diff --git a/README.md b/README.md index 77b2980..6cd9452 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ A distributed system for processing Ethereum execution layer data with support f ### Core Components - **Ethereum Nodes**: Configure execution node endpoints -- **Redis**: Task queue and leader election coordination +- **Redis**: Task queue and leader election coordination - **State Manager**: Track processing progress in ClickHouse - **Processors**: Configure structlog extraction settings @@ -57,6 +57,93 @@ A distributed system for processing Ethereum execution layer data with support f └─────────────────────────────────────────┘ ``` +## Embedded Mode (Library Usage) + +The execution-processor can be embedded as a library within an execution client, providing direct data access without JSON-RPC overhead. + +### Implementing DataSource + +```go +import ( + "context" + "math/big" + + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +type MyDataSource struct { + client *MyExecutionClient +} + +func (ds *MyDataSource) BlockNumber(ctx context.Context) (*uint64, error) { + num := ds.client.CurrentBlock() + return &num, nil +} + +func (ds *MyDataSource) BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error) { + return ds.client.GetBlock(number), nil +} + +func (ds *MyDataSource) BlockReceipts(ctx context.Context, number *big.Int) ([]*types.Receipt, error) { + return ds.client.GetBlockReceipts(number), nil +} + +func (ds *MyDataSource) TransactionReceipt(ctx context.Context, hash string) (*types.Receipt, error) { + return ds.client.GetReceipt(hash), nil +} + +func (ds *MyDataSource) DebugTraceTransaction( + ctx context.Context, + hash string, + blockNumber *big.Int, + opts execution.TraceOptions, +) (*execution.TraceTransaction, error) { + return ds.client.TraceTransaction(hash, opts), nil +} + +func (ds *MyDataSource) ChainID() int64 { + return ds.client.ChainID() +} + +func (ds *MyDataSource) ClientType() string { + return "my-client/1.0.0" +} + +func (ds *MyDataSource) IsSynced() bool { + return ds.client.IsSynced() +} +``` + +### Creating an Embedded Pool + +```go +import ( + "github.com/ethpandaops/execution-processor/pkg/ethereum" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +// Create embedded node with your data source +dataSource := &MyDataSource{client: myClient} +node := execution.NewEmbeddedNode(log, "embedded", dataSource) + +// Create pool with the embedded node +pool := ethereum.NewPoolWithNodes(log, "processor", []execution.Node{node}, nil) +pool.Start(ctx) + +// Mark ready when your client is synced and ready to serve data +node.MarkReady(ctx) +``` + +### Embedded vs RPC Mode + +| Aspect | RPC Mode | Embedded Mode | +|--------|----------|---------------| +| Data Access | JSON-RPC over HTTP | Direct function calls | +| Readiness | Auto-detected via RPC health checks | Host calls MarkReady() | +| Performance | Network + serialization overhead | Zero serialization overhead | +| Use Case | External execution clients | Library integration | + ## Manual Block Queue API The execution processor provides an HTTP API for manually queuing blocks for reprocessing. This is useful for fixing data issues or reprocessing specific blocks. @@ -80,7 +167,7 @@ curl -X POST http://localhost:8080/api/v1/queue/block/transaction_structlog/1234 "status": "queued", "block_number": 12345, "processor": "transaction_structlog", - "queue": "process:forwards", + "queue": "process:forwards", "transaction_count": 150, "tasks_created": 150 } @@ -158,7 +245,7 @@ curl -X POST http://localhost:8080/api/v1/queue/blocks/transaction_structlog \ # Run tests go test ./... -# Run with race detector +# Run with race detector go test ./... --race # Build @@ -167,4 +254,4 @@ go build . ## License -See LICENSE file. \ No newline at end of file +See LICENSE file. diff --git a/example_config.yaml b/example_config.yaml index 03b4efd..a6734f4 100644 --- a/example_config.yaml +++ b/example_config.yaml @@ -50,18 +50,21 @@ processors: addr: "localhost:9000" database: "default" table: "canonical_execution_transaction_structlog" - # debug: false # Enable debug logging for ClickHouse queries - # maxPendingBlockRange: 2 # Max distance between oldest incomplete and current block (default: 2) - # Channel-based batching configuration for memory-efficient processing - # bigTransactionThreshold: 500000 # Transactions with more structlogs are considered "big" (default: 500000) - # chunkSize: 10000 # Number of structlogs per batch (default: 10000) - # channelBufferSize: 2 # Number of chunks to buffer in channel (default: 2) - # progressLogThreshold: 100000 # Log progress every N structlogs for large transactions (default: 100000) + # debug: false # Enable debug logging for ClickHouse queries + # maxPendingBlockRange: 2 # Max distance between oldest incomplete and current block (default: 2) + # bufferMaxRows: 100000 # Max rows before flush (default: 100000) + # bufferFlushInterval: "1s" # Max time before flush (default: 1s) - # Small transaction batching configuration - # batchInsertThreshold: 50000 # Transactions with fewer structlogs than this will be batched (default: 50000) - # batchFlushInterval: 5s # Maximum time to wait before flushing a batch (default: 5s) - # batchMaxSize: 100000 # Maximum number of structlogs to accumulate in a batch (default: 100000) + # Aggregated structlog processor (call frame level aggregation) + transactionStructlogAgg: + enabled: false + addr: "localhost:9000" + database: "default" + table: "canonical_execution_transaction_structlog_agg" + # debug: false # Enable debug logging for ClickHouse queries + # maxPendingBlockRange: 2 # Max distance between oldest incomplete and current block (default: 2) + # bufferMaxRows: 100000 # Max rows before flush (default: 100000) + # bufferFlushInterval: "1s" # Max time before flush (default: 1s) # Simple transaction processor (lightweight - no debug traces) transactionSimple: @@ -69,8 +72,10 @@ processors: addr: "localhost:9000" database: "default" table: "execution_transaction" - # debug: false # Enable debug logging for ClickHouse queries - # maxPendingBlockRange: 2 # Max distance between oldest incomplete and current block (default: 2) + # debug: false # Enable debug logging for ClickHouse queries + # maxPendingBlockRange: 2 # Max distance between oldest incomplete and current block (default: 2) + # bufferMaxRows: 100000 # Max rows before flush (default: 100000) + # bufferFlushInterval: "1s" # Max time before flush (default: 1s) # Application settings shutdownTimeout: 6m diff --git a/go.mod b/go.mod index dc38a0d..8a32246 100644 --- a/go.mod +++ b/go.mod @@ -9,10 +9,11 @@ require ( github.com/creasty/defaults v1.8.0 github.com/ethereum/go-ethereum v1.16.7 github.com/go-co-op/gocron v1.37.0 + github.com/go-redsync/redsync/v4 v4.15.0 github.com/hibiken/asynq v0.25.1 github.com/lib/pq v1.10.9 github.com/prometheus/client_golang v1.20.5 - github.com/redis/go-redis/v9 v9.14.0 + github.com/redis/go-redis/v9 v9.17.2 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 @@ -60,6 +61,8 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.3 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.5 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-version v1.7.0 // indirect github.com/holiman/uint256 v1.3.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/go.sum b/go.sum index c9e71e0..e165a32 100644 --- a/go.sum +++ b/go.sum @@ -124,6 +124,14 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= +github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg= +github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= +github.com/go-redis/redis/v7 v7.4.1 h1:PASvf36gyUpr2zdOUS/9Zqc80GbM+9BDyiJSJDDOrTI= +github.com/go-redis/redis/v7 v7.4.1/go.mod h1:JDNMw23GTyLNC4GZu9njt15ctBQVn7xjRfnwdHj/Dcg= +github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI= +github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= +github.com/go-redsync/redsync/v4 v4.15.0 h1:KH/XymuxSV7vyKs6z1Cxxj+N+N18JlPxgXeP6x4JY54= +github.com/go-redsync/redsync/v4 v4.15.0/go.mod h1:qNp+lLs3vkfZbtA/aM/OjlZHfEr5YTAYhRktFPKHC7s= github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E= github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -132,6 +140,8 @@ github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXe github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gomodule/redigo v1.9.3 h1:dNPSXeXv6HCq2jdyWfjgmhBdqnR6PRO3m/G05nvpPC8= +github.com/gomodule/redigo v1.9.3/go.mod h1:KsU3hiK/Ay8U42qpaJk+kuNa3C+spxapWpM+ywhcgtw= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -141,8 +151,13 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.5 h1:jP1RStw811EvUDzsUQ9oESqw2e4RqCjSAD9qIL8eMns= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.5/go.mod h1:WXNBZ64q3+ZUemCMXD9kYnr56H7CgZxDBHCVwstfl3s= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-bexpr v0.1.10 h1:9kuI5PFotCboP3dkDYFr/wi0gg0QVbSNz5oFRpxn4uE= github.com/hashicorp/go-bexpr v0.1.10/go.mod h1:oxlubA2vC/gFVfX1A6JGp7ls7uCDlfJn732ehYYg+g0= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hibiken/asynq v0.25.1 h1:phj028N0nm15n8O2ims+IvJ2gz4k2auvermngh9JhTw= @@ -255,8 +270,12 @@ github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQP github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= -github.com/redis/go-redis/v9 v9.14.0 h1:u4tNCjXOyzfgeLN+vAZaW1xUooqWDqVEsZN0U01jfAE= -github.com/redis/go-redis/v9 v9.14.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= +github.com/redis/go-redis/v9 v9.17.2 h1:P2EGsA4qVIM3Pp+aPocCJ7DguDHhqrXNhVcEp4ViluI= +github.com/redis/go-redis/v9 v9.17.2/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370= +github.com/redis/rueidis v1.0.69 h1:WlUefRhuDekji5LsD387ys3UCJtSFeBVf0e5yI0B8b4= +github.com/redis/rueidis v1.0.69/go.mod h1:Lkhr2QTgcoYBhxARU7kJRO8SyVlgUuEkcJO1Y8MCluA= +github.com/redis/rueidis/rueidiscompat v1.0.69 h1:IWVYY9lXdjNO3do2VpJT7aDFi8zbCUuQxZB6E2Grahs= +github.com/redis/rueidis/rueidiscompat v1.0.69/go.mod h1:iC4Y8DoN0Uth0Uezg9e2trvNRC7QAgGeuP2OPLb5ccI= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= @@ -299,6 +318,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203 h1:QVqDTf3h2WHt08YuiTGPZLls0Wq99X9bWd0Q5ZSBesM= +github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203/go.mod h1:oqN97ltKNihBbwlX8dLpwxCl3+HnXKV/R0e+sRLd9C8= github.com/supranational/blst v0.3.16 h1:bTDadT+3fK497EvLdWRQEjiGnUtzJ7jjIUMF0jqwYhE= github.com/supranational/blst v0.3.16/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= diff --git a/pkg/common/metrics.go b/pkg/common/metrics.go index f452116..4ed8f94 100644 --- a/pkg/common/metrics.go +++ b/pkg/common/metrics.go @@ -205,4 +205,32 @@ var ( Name: "execution_processor_clickhouse_pool_empty_acquire_wait_duration_seconds", Help: "Cumulative time spent waiting for a resource when pool was empty", }, []string{"network", "processor"}) + + // Row buffer metrics for batched ClickHouse inserts. + RowBufferFlushTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "execution_processor_row_buffer_flush_total", + Help: "Total number of row buffer flushes", + }, []string{"network", "processor", "table", "trigger", "status"}) + + RowBufferFlushDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "execution_processor_row_buffer_flush_duration_seconds", + Help: "Duration of row buffer flushes", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), + }, []string{"network", "processor", "table"}) + + RowBufferFlushSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "execution_processor_row_buffer_flush_size_rows", + Help: "Number of rows per flush", + Buckets: prometheus.ExponentialBuckets(100, 2, 12), + }, []string{"network", "processor", "table"}) + + RowBufferPendingRows = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "execution_processor_row_buffer_pending_rows", + Help: "Current number of rows waiting in the buffer", + }, []string{"network", "processor", "table"}) + + RowBufferPendingTasks = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "execution_processor_row_buffer_pending_tasks", + Help: "Current number of tasks waiting for their rows to be flushed", + }, []string{"network", "processor", "table"}) ) diff --git a/pkg/config/config.go b/pkg/config/config.go new file mode 100644 index 0000000..f20d0a7 --- /dev/null +++ b/pkg/config/config.go @@ -0,0 +1,84 @@ +// Package config provides configuration types for execution-processor. +// This package is designed to be imported without pulling in go-ethereum dependencies, +// making it suitable for embedded mode integrations. +package config + +import ( + "fmt" + "time" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" + "github.com/ethpandaops/execution-processor/pkg/processor" + "github.com/ethpandaops/execution-processor/pkg/redis" + "github.com/ethpandaops/execution-processor/pkg/state" +) + +// EthereumConfig is the ethereum network configuration. +// This is a copy of ethereum.Config to avoid importing pkg/ethereum +// which would pull in go-ethereum dependencies. +type EthereumConfig struct { + // Execution configuration + Execution []*execution.Config `yaml:"execution"` + // Override network name for custom networks (bypasses networkMap) + OverrideNetworkName *string `yaml:"overrideNetworkName"` +} + +// Validate validates the ethereum configuration. +func (c *EthereumConfig) Validate() error { + for i, exec := range c.Execution { + if err := exec.Validate(); err != nil { + return fmt.Errorf("invalid execution configuration at index %d: %w", i, err) + } + } + + return nil +} + +// Config is the main configuration for execution-processor. +type Config struct { + // MetricsAddr is the address to listen on for metrics. + MetricsAddr string `yaml:"metricsAddr" default:":9090"` + // HealthCheckAddr is the address to listen on for healthcheck. + HealthCheckAddr *string `yaml:"healthCheckAddr"` + // PProfAddr is the address to listen on for pprof. + PProfAddr *string `yaml:"pprofAddr"` + // APIAddr is the address to listen on for the API server. + APIAddr *string `yaml:"apiAddr"` + // LoggingLevel is the logging level to use. + LoggingLevel string `yaml:"logging" default:"info"` + // Ethereum is the ethereum network configuration. + Ethereum EthereumConfig `yaml:"ethereum"` + // Redis is the redis configuration. + Redis *redis.Config `yaml:"redis"` + // StateManager is the state manager configuration. + StateManager state.Config `yaml:"stateManager"` + // Processors is the processor configuration. + Processors processor.Config `yaml:"processors"` + // ShutdownTimeout is the timeout for shutting down the server. + ShutdownTimeout time.Duration `yaml:"shutdownTimeout" default:"10s"` +} + +// Validate validates the configuration. +func (c *Config) Validate() error { + if c.Redis == nil { + return fmt.Errorf("redis configuration is required") + } + + if err := c.Redis.Validate(); err != nil { + return fmt.Errorf("invalid redis configuration: %w", err) + } + + if err := c.Ethereum.Validate(); err != nil { + return fmt.Errorf("invalid ethereum configuration: %w", err) + } + + if err := c.StateManager.Validate(); err != nil { + return fmt.Errorf("invalid state manager configuration: %w", err) + } + + if err := c.Processors.Validate(); err != nil { + return fmt.Errorf("invalid processor configuration: %w", err) + } + + return nil +} diff --git a/pkg/ethereum/execution/block.go b/pkg/ethereum/execution/block.go new file mode 100644 index 0000000..e4e839d --- /dev/null +++ b/pkg/ethereum/execution/block.go @@ -0,0 +1,136 @@ +package execution + +import "math/big" + +// Hash represents a 32-byte hash. +type Hash [32]byte + +// Hex returns the hex string representation of the hash. +func (h Hash) Hex() string { + return "0x" + encodeHex(h[:]) +} + +// String returns the hex string representation of the hash. +func (h Hash) String() string { + return h.Hex() +} + +// Address represents a 20-byte Ethereum address. +type Address [20]byte + +// Hex returns the hex string representation of the address with checksum. +func (a Address) Hex() string { + return "0x" + encodeHex(a[:]) +} + +// String returns the hex string representation of the address. +func (a Address) String() string { + return a.Hex() +} + +// encodeHex encodes bytes as hex string without 0x prefix. +func encodeHex(b []byte) string { + const hexChars = "0123456789abcdef" + + result := make([]byte, len(b)*2) + + for i, v := range b { + result[i*2] = hexChars[v>>4] + result[i*2+1] = hexChars[v&0x0f] + } + + return string(result) +} + +// Transaction type constants matching go-ethereum values. +const ( + LegacyTxType = 0 + AccessListTxType = 1 + DynamicFeeTxType = 2 + BlobTxType = 3 +) + +// Block interface defines methods for accessing block data. +// Implementations are provided by data sources (RPC, embedded clients). +type Block interface { + // Number returns the block number. + Number() *big.Int + + // Hash returns the block hash. + Hash() Hash + + // ParentHash returns the parent block hash. + ParentHash() Hash + + // BaseFee returns the base fee per gas (EIP-1559), or nil for pre-London blocks. + BaseFee() *big.Int + + // Transactions returns all transactions in the block. + Transactions() []Transaction +} + +// Transaction interface defines methods for accessing transaction data. +// The From() method returns the sender address, computed by the data source +// using its own crypto implementation (avoiding go-ethereum crypto imports). +type Transaction interface { + // Hash returns the transaction hash. + Hash() Hash + + // Type returns the transaction type (0=legacy, 1=access list, 2=dynamic fee, 3=blob). + Type() uint8 + + // To returns the recipient address, or nil for contract creation. + To() *Address + + // From returns the sender address. + // This is computed by the data source using types.Sender() or equivalent. + From() Address + + // Nonce returns the sender account nonce. + Nonce() uint64 + + // Gas returns the gas limit. + Gas() uint64 + + // GasPrice returns the gas price (for legacy transactions). + GasPrice() *big.Int + + // GasTipCap returns the max priority fee per gas (EIP-1559). + GasTipCap() *big.Int + + // GasFeeCap returns the max fee per gas (EIP-1559). + GasFeeCap() *big.Int + + // Value returns the value transferred in wei. + Value() *big.Int + + // Data returns the input data (calldata). + Data() []byte + + // Size returns the encoded transaction size in bytes. + Size() uint64 + + // ChainId returns the chain ID, or nil for legacy transactions. + ChainId() *big.Int + + // BlobGas returns the blob gas used (for blob transactions). + BlobGas() uint64 + + // BlobGasFeeCap returns the max blob fee per gas (for blob transactions). + BlobGasFeeCap() *big.Int + + // BlobHashes returns the versioned hashes (for blob transactions). + BlobHashes() []Hash +} + +// Receipt interface defines methods for accessing transaction receipt data. +type Receipt interface { + // Status returns the transaction status (1=success, 0=failure). + Status() uint64 + + // TxHash returns the transaction hash. + TxHash() Hash + + // GasUsed returns the gas used by the transaction. + GasUsed() uint64 +} diff --git a/pkg/ethereum/execution/embedded_node.go b/pkg/ethereum/execution/embedded_node.go new file mode 100644 index 0000000..ab97c2f --- /dev/null +++ b/pkg/ethereum/execution/embedded_node.go @@ -0,0 +1,220 @@ +package execution + +import ( + "context" + "math/big" + "sync" + + "github.com/sirupsen/logrus" +) + +// DataSource is the interface host applications implement to provide +// execution data directly without JSON-RPC. This enables embedding +// execution-processor as a library within an execution client. +// +// All methods must be safe for concurrent calls from multiple goroutines. +// Context cancellation should be respected for all I/O operations. +// +// The interface uses abstract types (Block, Transaction, Receipt) instead of +// go-ethereum types to avoid CGO dependencies. Host applications should +// implement these interfaces with their own types. +// +// Example implementation: +// +// type MyDataSource struct { +// client *MyExecutionClient +// } +// +// func (ds *MyDataSource) BlockNumber(ctx context.Context) (*uint64, error) { +// num := ds.client.CurrentBlock() +// return &num, nil +// } +type DataSource interface { + // BlockNumber returns the current block number. + BlockNumber(ctx context.Context) (*uint64, error) + + // BlockByNumber returns the block at the given number. + BlockByNumber(ctx context.Context, number *big.Int) (Block, error) + + // BlocksByNumbers returns blocks at the given numbers. + // Returns blocks up to the first not-found (contiguous only). + BlocksByNumbers(ctx context.Context, numbers []*big.Int) ([]Block, error) + + // BlockReceipts returns all receipts for the block at the given number. + BlockReceipts(ctx context.Context, number *big.Int) ([]Receipt, error) + + // TransactionReceipt returns the receipt for the transaction with the given hash. + TransactionReceipt(ctx context.Context, hash string) (Receipt, error) + + // DebugTraceTransaction returns the execution trace for the transaction. + DebugTraceTransaction(ctx context.Context, hash string, blockNumber *big.Int, opts TraceOptions) (*TraceTransaction, error) + + // ChainID returns the chain ID. + ChainID() int64 + + // ClientType returns the client type/version string. + ClientType() string + + // IsSynced returns true if the data source is fully synced. + IsSynced() bool +} + +// Compile-time check that EmbeddedNode implements Node interface. +var _ Node = (*EmbeddedNode)(nil) + +// EmbeddedNode implements Node by delegating to a DataSource. +// This allows host applications to provide execution data directly +// without going through JSON-RPC, eliminating serialization overhead. +// +// Lifecycle: +// 1. Create with NewEmbeddedNode(log, name, dataSource) +// 2. Register OnReady callbacks (optional) +// 3. Pool calls Start() (no-op for embedded) +// 4. Host calls MarkReady() when DataSource is ready to serve data +// 5. Callbacks execute in registration order, node becomes healthy in pool +// 6. Pool calls Stop() on shutdown (no-op for embedded) +// +// Thread-safety: All methods are safe for concurrent use. +type EmbeddedNode struct { + log logrus.FieldLogger + name string + source DataSource + ready bool + onReadyCallbacks []func(ctx context.Context) error + mu sync.RWMutex +} + +// NewEmbeddedNode creates a new EmbeddedNode with the given DataSource. +// +// Parameters: +// - log: Logger for node operations +// - name: Human-readable name for this node (used in logs and metrics) +// - source: DataSource implementation providing execution data +// +// The returned node is not yet ready. Call MarkReady() when the DataSource +// is ready to serve data. +func NewEmbeddedNode(log logrus.FieldLogger, name string, source DataSource) *EmbeddedNode { + return &EmbeddedNode{ + log: log.WithFields(logrus.Fields{"type": "execution", "source": name, "mode": "embedded"}), + name: name, + source: source, + onReadyCallbacks: make([]func(ctx context.Context) error, 0), + } +} + +// Start is a no-op for EmbeddedNode. The host controls readiness via MarkReady(). +func (n *EmbeddedNode) Start(_ context.Context) error { + n.log.Info("EmbeddedNode started - waiting for host to call MarkReady()") + + return nil +} + +// Stop is a no-op for EmbeddedNode. The host manages the DataSource lifecycle. +func (n *EmbeddedNode) Stop(_ context.Context) error { + n.log.Info("EmbeddedNode stopped") + + return nil +} + +// MarkReady is called by the host application when the DataSource is ready. +// This triggers all registered OnReady callbacks. +func (n *EmbeddedNode) MarkReady(ctx context.Context) error { + n.mu.Lock() + n.ready = true + callbacks := n.onReadyCallbacks + n.mu.Unlock() + + n.log.WithField("callback_count", len(callbacks)).Info("EmbeddedNode marked as ready, executing callbacks") + + for i, cb := range callbacks { + n.log.WithField("callback_index", i).Info("Executing OnReady callback") + + if err := cb(ctx); err != nil { + n.log.WithError(err).Error("Failed to execute OnReady callback") + + return err + } + } + + return nil +} + +// OnReady registers a callback to be called when the node becomes ready. +func (n *EmbeddedNode) OnReady(_ context.Context, callback func(ctx context.Context) error) { + n.mu.Lock() + defer n.mu.Unlock() + + n.onReadyCallbacks = append(n.onReadyCallbacks, callback) +} + +// IsReady returns true if the node has been marked as ready. +func (n *EmbeddedNode) IsReady() bool { + n.mu.RLock() + defer n.mu.RUnlock() + + return n.ready +} + +// BlockNumber delegates to the DataSource. +func (n *EmbeddedNode) BlockNumber(ctx context.Context) (*uint64, error) { + return n.source.BlockNumber(ctx) +} + +// BlockByNumber delegates to the DataSource. +func (n *EmbeddedNode) BlockByNumber(ctx context.Context, number *big.Int) (Block, error) { + return n.source.BlockByNumber(ctx, number) +} + +// BlocksByNumbers delegates to the DataSource. +func (n *EmbeddedNode) BlocksByNumbers(ctx context.Context, numbers []*big.Int) ([]Block, error) { + return n.source.BlocksByNumbers(ctx, numbers) +} + +// BlockReceipts delegates to the DataSource. +func (n *EmbeddedNode) BlockReceipts(ctx context.Context, number *big.Int) ([]Receipt, error) { + return n.source.BlockReceipts(ctx, number) +} + +// TransactionReceipt delegates to the DataSource. +func (n *EmbeddedNode) TransactionReceipt(ctx context.Context, hash string) (Receipt, error) { + return n.source.TransactionReceipt(ctx, hash) +} + +// DebugTraceTransaction delegates to the DataSource. +// +// OPTIMIZATION: In embedded mode, the tracer extracts CallToAddress directly +// for CALL-family opcodes instead of capturing the full stack. We explicitly +// set DisableStack: true to signal this intent, even though the tracer ignores +// this setting (it always uses the optimized path). +func (n *EmbeddedNode) DebugTraceTransaction( + ctx context.Context, + hash string, + blockNumber *big.Int, + opts TraceOptions, +) (*TraceTransaction, error) { + // Override DisableStack for embedded mode optimization. + // The tracer extracts CallToAddress directly, so full stack capture is unnecessary. + opts.DisableStack = true + + return n.source.DebugTraceTransaction(ctx, hash, blockNumber, opts) +} + +// ChainID delegates to the DataSource. +func (n *EmbeddedNode) ChainID() int64 { + return n.source.ChainID() +} + +// ClientType delegates to the DataSource. +func (n *EmbeddedNode) ClientType() string { + return n.source.ClientType() +} + +// IsSynced delegates to the DataSource. +func (n *EmbeddedNode) IsSynced() bool { + return n.source.IsSynced() +} + +// Name returns the configured name for this node. +func (n *EmbeddedNode) Name() string { + return n.name +} diff --git a/pkg/ethereum/execution/embedded_node_test.go b/pkg/ethereum/execution/embedded_node_test.go new file mode 100644 index 0000000..3dfaac0 --- /dev/null +++ b/pkg/ethereum/execution/embedded_node_test.go @@ -0,0 +1,728 @@ +package execution_test + +import ( + "context" + "errors" + "math/big" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +// MockBlock implements execution.Block for testing. +type MockBlock struct { + number *big.Int + hash execution.Hash + parentHash execution.Hash + baseFee *big.Int + txs []execution.Transaction +} + +func (b *MockBlock) Number() *big.Int { return b.number } +func (b *MockBlock) Hash() execution.Hash { return b.hash } +func (b *MockBlock) ParentHash() execution.Hash { return b.parentHash } +func (b *MockBlock) BaseFee() *big.Int { return b.baseFee } +func (b *MockBlock) Transactions() []execution.Transaction { return b.txs } + +// NewMockBlock creates a mock block with the given number. +func NewMockBlock(number *big.Int) *MockBlock { + return &MockBlock{ + number: number, + hash: execution.Hash{}, + parentHash: execution.Hash{}, + baseFee: big.NewInt(1000000000), + txs: []execution.Transaction{}, + } +} + +// MockReceipt implements execution.Receipt for testing. +type MockReceipt struct { + status uint64 + txHash execution.Hash + gasUsed uint64 +} + +func (r *MockReceipt) Status() uint64 { return r.status } +func (r *MockReceipt) TxHash() execution.Hash { return r.txHash } +func (r *MockReceipt) GasUsed() uint64 { return r.gasUsed } + +// NewMockReceipt creates a mock receipt with the given status. +func NewMockReceipt(status uint64, gasUsed uint64) *MockReceipt { + return &MockReceipt{ + status: status, + txHash: execution.Hash{}, + gasUsed: gasUsed, + } +} + +// MockDataSource implements execution.DataSource for testing. +type MockDataSource struct { + mock.Mock +} + +func (m *MockDataSource) BlockNumber(ctx context.Context) (*uint64, error) { + args := m.Called(ctx) + + if args.Get(0) == nil { + return nil, args.Error(1) + } + + val, ok := args.Get(0).(*uint64) + if !ok { + return nil, args.Error(1) + } + + return val, args.Error(1) +} + +func (m *MockDataSource) BlockByNumber(ctx context.Context, number *big.Int) (execution.Block, error) { + args := m.Called(ctx, number) + + if args.Get(0) == nil { + return nil, args.Error(1) + } + + val, ok := args.Get(0).(execution.Block) + if !ok { + return nil, args.Error(1) + } + + return val, args.Error(1) +} + +func (m *MockDataSource) BlockReceipts(ctx context.Context, number *big.Int) ([]execution.Receipt, error) { + args := m.Called(ctx, number) + + if args.Get(0) == nil { + return nil, args.Error(1) + } + + val, ok := args.Get(0).([]execution.Receipt) + if !ok { + return nil, args.Error(1) + } + + return val, args.Error(1) +} + +func (m *MockDataSource) TransactionReceipt(ctx context.Context, hash string) (execution.Receipt, error) { + args := m.Called(ctx, hash) + + if args.Get(0) == nil { + return nil, args.Error(1) + } + + val, ok := args.Get(0).(execution.Receipt) + if !ok { + return nil, args.Error(1) + } + + return val, args.Error(1) +} + +func (m *MockDataSource) DebugTraceTransaction( + ctx context.Context, + hash string, + blockNumber *big.Int, + opts execution.TraceOptions, +) (*execution.TraceTransaction, error) { + args := m.Called(ctx, hash, blockNumber, opts) + + if args.Get(0) == nil { + return nil, args.Error(1) + } + + val, ok := args.Get(0).(*execution.TraceTransaction) + if !ok { + return nil, args.Error(1) + } + + return val, args.Error(1) +} + +func (m *MockDataSource) ChainID() int64 { + args := m.Called() + + val, ok := args.Get(0).(int64) + if !ok { + return 0 + } + + return val +} + +func (m *MockDataSource) ClientType() string { + args := m.Called() + + return args.String(0) +} + +func (m *MockDataSource) IsSynced() bool { + args := m.Called() + + return args.Bool(0) +} + +func (m *MockDataSource) BlocksByNumbers(ctx context.Context, numbers []*big.Int) ([]execution.Block, error) { + args := m.Called(ctx, numbers) + + if args.Get(0) == nil { + return nil, args.Error(1) + } + + val, ok := args.Get(0).([]execution.Block) + if !ok { + return nil, args.Error(1) + } + + return val, args.Error(1) +} + +func TestEmbeddedNode_Creation(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + require.NotNil(t, node) + assert.Equal(t, "test-node", node.Name()) + assert.False(t, node.IsReady()) +} + +func TestEmbeddedNode_Start_NoOp(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + err := node.Start(ctx) + + assert.NoError(t, err) + // Start should not mark the node as ready + assert.False(t, node.IsReady()) +} + +func TestEmbeddedNode_Stop_NoOp(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + // Start and mark ready first + err := node.Start(ctx) + require.NoError(t, err) + + err = node.MarkReady(ctx) + require.NoError(t, err) + + // Stop should complete without error + err = node.Stop(ctx) + assert.NoError(t, err) +} + +func TestEmbeddedNode_MarkReady_ExecutesCallbacks(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + // Track callback execution order + var order []int + + var mu sync.Mutex + + node.OnReady(ctx, func(_ context.Context) error { + mu.Lock() + defer mu.Unlock() + + order = append(order, 1) + + return nil + }) + + node.OnReady(ctx, func(_ context.Context) error { + mu.Lock() + defer mu.Unlock() + + order = append(order, 2) + + return nil + }) + + node.OnReady(ctx, func(_ context.Context) error { + mu.Lock() + defer mu.Unlock() + + order = append(order, 3) + + return nil + }) + + assert.False(t, node.IsReady()) + + err := node.MarkReady(ctx) + require.NoError(t, err) + + assert.True(t, node.IsReady()) + assert.Equal(t, []int{1, 2, 3}, order) +} + +func TestEmbeddedNode_MarkReady_CallbackError(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + expectedErr := errors.New("callback failed") + + var callbacksCalled int + + node.OnReady(ctx, func(_ context.Context) error { + callbacksCalled++ + + return nil + }) + + node.OnReady(ctx, func(_ context.Context) error { + callbacksCalled++ + + return expectedErr + }) + + node.OnReady(ctx, func(_ context.Context) error { + callbacksCalled++ + + return nil + }) + + err := node.MarkReady(ctx) + assert.ErrorIs(t, err, expectedErr) + // Only first two callbacks should have been called (second one failed) + assert.Equal(t, 2, callbacksCalled) +} + +func TestEmbeddedNode_OnReady_MultipleCallbacks(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + var count atomic.Int32 + + const numCallbacks = 10 + for i := 0; i < numCallbacks; i++ { + node.OnReady(ctx, func(_ context.Context) error { + count.Add(1) + + return nil + }) + } + + err := node.MarkReady(ctx) + require.NoError(t, err) + + assert.Equal(t, int32(numCallbacks), count.Load()) +} + +func TestEmbeddedNode_IsReady_States(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + // Initially not ready + assert.False(t, node.IsReady()) + + // Still not ready after Start + err := node.Start(ctx) + require.NoError(t, err) + + assert.False(t, node.IsReady()) + + // Ready after MarkReady + err = node.MarkReady(ctx) + require.NoError(t, err) + + assert.True(t, node.IsReady()) + + // Still ready after Stop + err = node.Stop(ctx) + require.NoError(t, err) + + assert.True(t, node.IsReady()) +} + +func TestEmbeddedNode_DelegatesToDataSource_BlockNumber(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + expectedBlock := uint64(12345) + + ds.On("BlockNumber", ctx).Return(&expectedBlock, nil) + + result, err := node.BlockNumber(ctx) + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, expectedBlock, *result) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_DelegatesToDataSource_BlockByNumber(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + blockNum := big.NewInt(12345) + expectedBlock := NewMockBlock(blockNum) + + ds.On("BlockByNumber", ctx, blockNum).Return(expectedBlock, nil) + + result, err := node.BlockByNumber(ctx, blockNum) + require.NoError(t, err) + assert.Equal(t, expectedBlock.Number(), result.Number()) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_DelegatesToDataSource_BlockReceipts(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + blockNum := big.NewInt(12345) + expectedReceipts := []execution.Receipt{ + NewMockReceipt(1, 21000), + NewMockReceipt(0, 50000), + } + + ds.On("BlockReceipts", ctx, blockNum).Return(expectedReceipts, nil) + + result, err := node.BlockReceipts(ctx, blockNum) + require.NoError(t, err) + assert.Len(t, result, 2) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_DelegatesToDataSource_TransactionReceipt(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + txHash := "0x1234567890abcdef" + expectedReceipt := NewMockReceipt(1, 21000) + + ds.On("TransactionReceipt", ctx, txHash).Return(expectedReceipt, nil) + + result, err := node.TransactionReceipt(ctx, txHash) + require.NoError(t, err) + assert.Equal(t, uint64(1), result.Status()) + assert.Equal(t, uint64(21000), result.GasUsed()) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_DelegatesToDataSource_DebugTraceTransaction(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + txHash := "0x1234567890abcdef" + blockNum := big.NewInt(12345) + opts := execution.DefaultTraceOptions() + expectedTrace := &execution.TraceTransaction{ + Gas: 21000, + Failed: false, + } + + ds.On("DebugTraceTransaction", ctx, txHash, blockNum, opts).Return(expectedTrace, nil) + + result, err := node.DebugTraceTransaction(ctx, txHash, blockNum, opts) + require.NoError(t, err) + assert.Equal(t, expectedTrace, result) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_DelegatesToDataSource_ClientType(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ds.On("ClientType").Return("geth/1.10.0") + + result := node.ClientType() + assert.Equal(t, "geth/1.10.0", result) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_DelegatesToDataSource_IsSynced(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ds.On("IsSynced").Return(true) + + result := node.IsSynced() + assert.True(t, result) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_Name(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + testCases := []struct { + name string + expectedName string + }{ + {name: "simple-name", expectedName: "simple-name"}, + {name: "with-numbers-123", expectedName: "with-numbers-123"}, + {name: "embedded-erigon", expectedName: "embedded-erigon"}, + {name: "", expectedName: ""}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, tc.name, ds) + + assert.Equal(t, tc.expectedName, node.Name()) + }) + } +} + +func TestEmbeddedNode_ConcurrentMarkReady(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + var callbackCount atomic.Int32 + + node.OnReady(ctx, func(_ context.Context) error { + callbackCount.Add(1) + + return nil + }) + + // Start multiple goroutines calling MarkReady concurrently + const numGoroutines = 10 + + var wg sync.WaitGroup + + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + // Ignore errors - only the first MarkReady should execute callbacks + _ = node.MarkReady(ctx) + }() + } + + wg.Wait() + + // Node should be ready + assert.True(t, node.IsReady()) + + // Callback should have been called at least once + // (implementation may allow multiple calls, but at least one should succeed) + assert.GreaterOrEqual(t, callbackCount.Load(), int32(1)) +} + +func TestEmbeddedNode_ConcurrentOnReady(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + var callbackCount atomic.Int32 + + // Register callbacks concurrently + const numGoroutines = 10 + + var wg sync.WaitGroup + + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + + node.OnReady(ctx, func(_ context.Context) error { + callbackCount.Add(1) + + return nil + }) + }() + } + + wg.Wait() + + // Now mark ready and verify all callbacks execute + err := node.MarkReady(ctx) + require.NoError(t, err) + + assert.Equal(t, int32(numGoroutines), callbackCount.Load()) +} + +func TestEmbeddedNode_InterfaceCompliance(t *testing.T) { + // Compile-time check that EmbeddedNode implements Node interface + var _ execution.Node = (*execution.EmbeddedNode)(nil) + + // Create an actual instance and verify it can be used as Node + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + + var node execution.Node = execution.NewEmbeddedNode(log, "test-node", ds) + + require.NotNil(t, node) + assert.Equal(t, "test-node", node.Name()) +} + +func TestEmbeddedNode_DataSourceError(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + expectedErr := errors.New("data source error") + + ds.On("BlockNumber", ctx).Return(nil, expectedErr) + + result, err := node.BlockNumber(ctx) + assert.ErrorIs(t, err, expectedErr) + assert.Nil(t, result) + + ds.AssertExpectations(t) +} + +func TestEmbeddedNode_ContextCancellation(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + // DataSource should receive cancelled context + ds.On("BlockNumber", ctx).Return(nil, ctx.Err()) + + result, err := node.BlockNumber(ctx) + assert.Error(t, err) + assert.Nil(t, result) +} + +func TestEmbeddedNode_CallbackWithContext(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx := context.Background() + + var receivedCtx context.Context + + node.OnReady(ctx, func(cbCtx context.Context) error { + receivedCtx = cbCtx + + return nil + }) + + err := node.MarkReady(ctx) + require.NoError(t, err) + + // The callback should receive the context passed to MarkReady + assert.Equal(t, ctx, receivedCtx) +} + +func TestEmbeddedNode_CallbackWithTimeout(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ds := new(MockDataSource) + node := execution.NewEmbeddedNode(log, "test-node", ds) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + callbackExecuted := make(chan struct{}) + + node.OnReady(ctx, func(_ context.Context) error { + close(callbackExecuted) + + return nil + }) + + err := node.MarkReady(ctx) + require.NoError(t, err) + + select { + case <-callbackExecuted: + // Success + case <-time.After(1 * time.Second): + t.Fatal("callback did not execute") + } +} diff --git a/pkg/ethereum/execution/geth/adapter.go b/pkg/ethereum/execution/geth/adapter.go new file mode 100644 index 0000000..447ad76 --- /dev/null +++ b/pkg/ethereum/execution/geth/adapter.go @@ -0,0 +1,227 @@ +//go:build !embedded + +// Package geth provides go-ethereum adapters for the execution interfaces. +// This package contains all go-ethereum dependencies, allowing the core +// execution package to remain free of CGO-dependent imports. +package geth + +import ( + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +// Compile-time interface checks. +var ( + _ execution.Block = (*BlockAdapter)(nil) + _ execution.Transaction = (*TransactionAdapter)(nil) + _ execution.Receipt = (*ReceiptAdapter)(nil) +) + +// BlockAdapter wraps a go-ethereum Block to implement execution.Block. +type BlockAdapter struct { + block *types.Block + txs []execution.Transaction +} + +// NewBlockAdapter creates a new BlockAdapter from a go-ethereum Block. +// It extracts sender addresses for all transactions using the appropriate signer. +func NewBlockAdapter(block *types.Block) *BlockAdapter { + gethTxs := block.Transactions() + txs := make([]execution.Transaction, len(gethTxs)) + + for i, tx := range gethTxs { + txs[i] = NewTransactionAdapter(tx) + } + + return &BlockAdapter{ + block: block, + txs: txs, + } +} + +// Number returns the block number. +func (b *BlockAdapter) Number() *big.Int { + return b.block.Number() +} + +// Hash returns the block hash. +func (b *BlockAdapter) Hash() execution.Hash { + return execution.Hash(b.block.Hash()) +} + +// ParentHash returns the parent block hash. +func (b *BlockAdapter) ParentHash() execution.Hash { + return execution.Hash(b.block.ParentHash()) +} + +// BaseFee returns the base fee per gas (EIP-1559), or nil for pre-London blocks. +func (b *BlockAdapter) BaseFee() *big.Int { + return b.block.BaseFee() +} + +// Transactions returns all transactions in the block. +func (b *BlockAdapter) Transactions() []execution.Transaction { + return b.txs +} + +// TransactionAdapter wraps a go-ethereum Transaction to implement execution.Transaction. +type TransactionAdapter struct { + tx *types.Transaction + from common.Address +} + +// NewTransactionAdapter creates a new TransactionAdapter from a go-ethereum Transaction. +// It computes the sender address using the appropriate signer. +func NewTransactionAdapter(tx *types.Transaction) *TransactionAdapter { + // Determine the appropriate signer for extracting the sender + var signer types.Signer + + chainID := tx.ChainId() + if chainID == nil || chainID.Sign() == 0 { + // Legacy transaction without EIP-155 replay protection + signer = types.HomesteadSigner{} + } else { + signer = types.LatestSignerForChainID(chainID) + } + + // Extract sender - this uses go-ethereum's crypto package internally + from, _ := types.Sender(signer, tx) + + return &TransactionAdapter{ + tx: tx, + from: from, + } +} + +// Hash returns the transaction hash. +func (t *TransactionAdapter) Hash() execution.Hash { + return execution.Hash(t.tx.Hash()) +} + +// Type returns the transaction type. +func (t *TransactionAdapter) Type() uint8 { + return t.tx.Type() +} + +// To returns the recipient address, or nil for contract creation. +func (t *TransactionAdapter) To() *execution.Address { + if t.tx.To() == nil { + return nil + } + + addr := execution.Address(*t.tx.To()) + + return &addr +} + +// From returns the sender address. +func (t *TransactionAdapter) From() execution.Address { + return execution.Address(t.from) +} + +// Nonce returns the sender account nonce. +func (t *TransactionAdapter) Nonce() uint64 { + return t.tx.Nonce() +} + +// Gas returns the gas limit. +func (t *TransactionAdapter) Gas() uint64 { + return t.tx.Gas() +} + +// GasPrice returns the gas price (for legacy transactions). +func (t *TransactionAdapter) GasPrice() *big.Int { + return t.tx.GasPrice() +} + +// GasTipCap returns the max priority fee per gas (EIP-1559). +func (t *TransactionAdapter) GasTipCap() *big.Int { + return t.tx.GasTipCap() +} + +// GasFeeCap returns the max fee per gas (EIP-1559). +func (t *TransactionAdapter) GasFeeCap() *big.Int { + return t.tx.GasFeeCap() +} + +// Value returns the value transferred in wei. +func (t *TransactionAdapter) Value() *big.Int { + return t.tx.Value() +} + +// Data returns the input data (calldata). +func (t *TransactionAdapter) Data() []byte { + return t.tx.Data() +} + +// Size returns the encoded transaction size in bytes. +func (t *TransactionAdapter) Size() uint64 { + return t.tx.Size() +} + +// ChainId returns the chain ID, or nil for legacy transactions. +func (t *TransactionAdapter) ChainId() *big.Int { + return t.tx.ChainId() +} + +// BlobGas returns the blob gas used (for blob transactions). +func (t *TransactionAdapter) BlobGas() uint64 { + return t.tx.BlobGas() +} + +// BlobGasFeeCap returns the max blob fee per gas (for blob transactions). +func (t *TransactionAdapter) BlobGasFeeCap() *big.Int { + return t.tx.BlobGasFeeCap() +} + +// BlobHashes returns the versioned hashes (for blob transactions). +func (t *TransactionAdapter) BlobHashes() []execution.Hash { + gethHashes := t.tx.BlobHashes() + hashes := make([]execution.Hash, len(gethHashes)) + + for i, h := range gethHashes { + hashes[i] = execution.Hash(h) + } + + return hashes +} + +// ReceiptAdapter wraps a go-ethereum Receipt to implement execution.Receipt. +type ReceiptAdapter struct { + receipt *types.Receipt +} + +// NewReceiptAdapter creates a new ReceiptAdapter from a go-ethereum Receipt. +func NewReceiptAdapter(receipt *types.Receipt) *ReceiptAdapter { + return &ReceiptAdapter{receipt: receipt} +} + +// Status returns the transaction status (1=success, 0=failure). +func (r *ReceiptAdapter) Status() uint64 { + return r.receipt.Status +} + +// TxHash returns the transaction hash. +func (r *ReceiptAdapter) TxHash() execution.Hash { + return execution.Hash(r.receipt.TxHash) +} + +// GasUsed returns the gas used by the transaction. +func (r *ReceiptAdapter) GasUsed() uint64 { + return r.receipt.GasUsed +} + +// AdaptReceipts converts a slice of go-ethereum receipts to execution.Receipt interfaces. +func AdaptReceipts(receipts []*types.Receipt) []execution.Receipt { + result := make([]execution.Receipt, len(receipts)) + + for i, r := range receipts { + result[i] = NewReceiptAdapter(r) + } + + return result +} diff --git a/pkg/ethereum/execution/geth/rpc.go b/pkg/ethereum/execution/geth/rpc.go new file mode 100644 index 0000000..8dd9dfa --- /dev/null +++ b/pkg/ethereum/execution/geth/rpc.go @@ -0,0 +1,396 @@ +//go:build !embedded + +package geth + +import ( + "context" + "encoding/hex" + "encoding/json" + "fmt" + "math/big" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rpc" + + pcommon "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +const ( + statusError = "error" + statusSuccess = "success" +) + +func (n *RPCNode) blockNumber(ctx context.Context) (*uint64, error) { + start := time.Now() + + blockNumber, err := n.client.BlockNumber(ctx) + + duration := time.Since(start) + + // Record RPC metrics + status := statusSuccess + if err != nil { + status = statusError + } + + network := n.Metadata().ChainID() + + pcommon.RPCCallDuration.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_blockNumber", status).Observe(duration.Seconds()) + pcommon.RPCCallsTotal.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_blockNumber", status).Inc() + + if err != nil { + return nil, err + } + + return &blockNumber, nil +} + +func (n *RPCNode) blockByNumber(ctx context.Context, blockNumber *big.Int) (execution.Block, error) { + start := time.Now() + + block, err := n.client.BlockByNumber(ctx, blockNumber) + + duration := time.Since(start) + + // Record RPC metrics + status := statusSuccess + if err != nil { + status = statusError + } + + network := n.Metadata().ChainID() + + pcommon.RPCCallDuration.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_getBlockByNumber", status).Observe(duration.Seconds()) + pcommon.RPCCallsTotal.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_getBlockByNumber", status).Inc() + + if err != nil { + return nil, err + } + + return NewBlockAdapter(block), nil +} + +// blocksByNumbers fetches multiple blocks using batch RPC calls. +// Returns blocks up to the first not-found (contiguous only). +func (n *RPCNode) blocksByNumbers(ctx context.Context, numbers []*big.Int) ([]execution.Block, error) { + if len(numbers) == 0 { + return []execution.Block{}, nil + } + + start := time.Now() + network := n.Metadata().ChainID() + + // Prepare batch calls using json.RawMessage to handle null responses + batch := make([]rpc.BatchElem, len(numbers)) + results := make([]*json.RawMessage, len(numbers)) + + for i, num := range numbers { + results[i] = new(json.RawMessage) + batch[i] = rpc.BatchElem{ + Method: "eth_getBlockByNumber", + Args: []interface{}{toBlockNumArg(num), true}, // true = include transactions + Result: results[i], + } + } + + // Execute batch call + err := n.rpcClient.BatchCallContext(ctx, batch) + + duration := time.Since(start) + + // Record batch RPC metrics + status := statusSuccess + if err != nil { + status = statusError + } + + pcommon.RPCCallDuration.WithLabelValues( + fmt.Sprintf("%d", network), + n.config.Name, + "eth_getBlockByNumber_batch", + status, + ).Observe(duration.Seconds()) + + pcommon.RPCCallsTotal.WithLabelValues( + fmt.Sprintf("%d", network), + n.config.Name, + "eth_getBlockByNumber_batch", + status, + ).Inc() + + if err != nil { + return nil, fmt.Errorf("batch call failed: %w", err) + } + + // Process results, stopping at first not-found (contiguity requirement) + blocks := make([]execution.Block, 0, len(numbers)) + + for i, elem := range batch { + // Check for individual call error - stop at first error for contiguity + // We intentionally don't return this error as we want partial results + if elem.Error != nil { + break + } + + // Check for nil/not-found block (null JSON response) + if results[i] == nil || len(*results[i]) == 0 || string(*results[i]) == "null" { + // Block not found - stop here for contiguity + break + } + + // Parse the block from JSON + block, parseErr := parseBlockFromJSON(*results[i]) + if parseErr != nil { + // Parse error - stop here for contiguity + break + } + + blocks = append(blocks, NewBlockAdapter(block)) + } + + return blocks, nil //nolint:nilerr // Intentionally returning partial results for contiguity +} + +// toBlockNumArg converts a block number to the RPC argument format. +func toBlockNumArg(number *big.Int) string { + if number == nil { + return "latest" + } + + return fmt.Sprintf("0x%x", number) +} + +// parseBlockFromJSON parses a types.Block from JSON-RPC response. +func parseBlockFromJSON(raw json.RawMessage) (*types.Block, error) { + // Use go-ethereum's internal header structure for unmarshaling + var head *types.Header + if err := json.Unmarshal(raw, &head); err != nil { + return nil, fmt.Errorf("failed to unmarshal block header: %w", err) + } + + // Parse transactions separately + var body struct { + Transactions []*types.Transaction `json:"transactions"` + } + + if err := json.Unmarshal(raw, &body); err != nil { + return nil, fmt.Errorf("failed to unmarshal block body: %w", err) + } + + return types.NewBlockWithHeader(head).WithBody(types.Body{Transactions: body.Transactions}), nil +} + +// getTraceParams returns VM trace parameters with configurable options. +func getTraceParams(hash string, options execution.TraceOptions) []any { + return []any{ + hash, + map[string]any{ + "disableStorage": options.DisableStorage, + "disableStack": options.DisableStack, + "disableMemory": options.DisableMemory, + "enableReturnData": options.EnableReturnData, + }, + } +} + +// traceTransactionErigon handles tracing for Erigon clients. +func (n *RPCNode) traceTransactionErigon(ctx context.Context, hash string, options execution.TraceOptions) (*execution.TraceTransaction, error) { + var rsp erigonResult + + start := time.Now() + + err := n.rpcClient.CallContext(ctx, &rsp, "debug_traceTransaction", getTraceParams(hash, options)...) + + duration := time.Since(start) + + // Record RPC metrics + status := statusSuccess + if err != nil { + status = statusError + } + + network := n.Metadata().ChainID() + + pcommon.RPCCallDuration.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "debug_traceTransaction", status).Observe(duration.Seconds()) + pcommon.RPCCallsTotal.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "debug_traceTransaction", status).Inc() + + if err != nil { + return nil, err + } + + returnValue := rsp.ReturnValue + if returnValue != nil && (*returnValue == "" || *returnValue == "0x") { + returnValue = nil + } + + result := &execution.TraceTransaction{ + Gas: rsp.Gas, + Failed: rsp.Failed, + ReturnValue: returnValue, + Structlogs: make([]execution.StructLog, 0, len(rsp.StructLogs)), + } + + // Empty array on transfer + for _, log := range rsp.StructLogs { + var returnData *string + + if log.ReturnData != nil { + returnData = new(string) + *returnData = hex.EncodeToString(log.ReturnData) + } + + result.Structlogs = append(result.Structlogs, execution.StructLog{ + PC: log.PC, + Op: log.Op, + Gas: log.Gas, + GasCost: log.GasCost, + Depth: log.Depth, + ReturnData: returnData, + Refund: log.Refund, + Error: log.Error, + Stack: log.Stack, + }) + } + + // Sanitize gasCost values to fix Erigon's unsigned integer underflow bug. + execution.SanitizeStructLogs(result.Structlogs) + + return result, nil +} + +// blockReceipts fetches all receipts for a block by number (much faster than per-tx). +func (n *RPCNode) blockReceipts(ctx context.Context, blockNumber *big.Int) ([]execution.Receipt, error) { + start := time.Now() + + blockNrOrHash := rpc.BlockNumberOrHashWithNumber(rpc.BlockNumber(blockNumber.Int64())) + + receipts, err := n.client.BlockReceipts(ctx, blockNrOrHash) + + duration := time.Since(start) + + // Record RPC metrics + status := statusSuccess + if err != nil { + status = statusError + } + + network := n.Metadata().ChainID() + + pcommon.RPCCallDuration.WithLabelValues( + fmt.Sprintf("%d", network), + n.config.Name, + "eth_getBlockReceipts", + status, + ).Observe(duration.Seconds()) + + pcommon.RPCCallsTotal.WithLabelValues( + fmt.Sprintf("%d", network), + n.config.Name, + "eth_getBlockReceipts", + status, + ).Inc() + + if err != nil { + return nil, err + } + + return AdaptReceipts(receipts), nil +} + +// transactionReceipt fetches the receipt for a transaction by hash. +func (n *RPCNode) transactionReceipt(ctx context.Context, hash string) (execution.Receipt, error) { + start := time.Now() + + txHash := common.HexToHash(hash) + + receipt, err := n.client.TransactionReceipt(ctx, txHash) + + duration := time.Since(start) + + // Record RPC metrics + status := statusSuccess + if err != nil { + status = statusError + } + + network := n.Metadata().ChainID() + + pcommon.RPCCallDuration.WithLabelValues( + fmt.Sprintf("%d", network), + n.config.Name, + "eth_getTransactionReceipt", + status, + ).Observe(duration.Seconds()) + + pcommon.RPCCallsTotal.WithLabelValues( + fmt.Sprintf("%d", network), + n.config.Name, + "eth_getTransactionReceipt", + status, + ).Inc() + + if err != nil { + return nil, err + } + + return NewReceiptAdapter(receipt), nil +} + +// debugTraceTransaction traces a transaction execution using the client's debug API. +func (n *RPCNode) debugTraceTransaction( + ctx context.Context, + hash string, + _ *big.Int, + options execution.TraceOptions, +) (*execution.TraceTransaction, error) { + // Add a timeout if the context doesn't already have one + if _, hasDeadline := ctx.Deadline(); !hasDeadline { + var cancel context.CancelFunc + + ctx, cancel = context.WithTimeout(ctx, 60*time.Second) + + defer cancel() + } + + client := n.Metadata().Client(ctx) + + switch client { + case "geth": + return nil, fmt.Errorf("geth is not supported") + case "nethermind": + return nil, fmt.Errorf("nethermind is not supported") + case "besu": + return nil, fmt.Errorf("besu is not supported") + case "reth": + return nil, fmt.Errorf("reth is not supported") + case "erigon": + return n.traceTransactionErigon(ctx, hash, options) + default: + // Default to Erigon format if client is unknown + return n.traceTransactionErigon(ctx, hash, options) + } +} + +// erigonResult represents the result from an Erigon debug_traceTransaction call. +type erigonResult struct { + Gas uint64 `json:"gas"` + Failed bool `json:"failed"` + ReturnValue *string `json:"returnValue"` + StructLogs []erigonStructLog `json:"structLogs"` +} + +// erigonStructLog represents a single structlog entry from Erigon. +type erigonStructLog struct { + PC uint32 `json:"pc"` + Op string `json:"op"` + Gas uint64 `json:"gas"` + GasCost uint64 `json:"gasCost"` + Depth uint64 `json:"depth"` + ReturnData []byte `json:"returnData"` + Refund *uint64 `json:"refund"` + Error *string `json:"error"` + Stack *[]string `json:"stack"` +} diff --git a/pkg/ethereum/execution/node.go b/pkg/ethereum/execution/geth/rpc_node.go similarity index 66% rename from pkg/ethereum/execution/node.go rename to pkg/ethereum/execution/geth/rpc_node.go index d070193..6c8e469 100644 --- a/pkg/ethereum/execution/node.go +++ b/pkg/ethereum/execution/geth/rpc_node.go @@ -1,9 +1,12 @@ -package execution +//go:build !embedded + +package geth import ( "context" "errors" "fmt" + "math/big" "net" "net/http" "sync" @@ -11,10 +14,15 @@ import ( "github.com/ethereum/go-ethereum/ethclient" "github.com/ethereum/go-ethereum/rpc" - "github.com/ethpandaops/execution-processor/pkg/ethereum/execution/services" "github.com/sirupsen/logrus" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution/geth/services" ) +// Compile-time check that RPCNode implements execution.Node interface. +var _ execution.Node = (*RPCNode)(nil) + // headerTransport adds custom headers to requests and respects context cancellation. type headerTransport struct { headers map[string]string @@ -36,8 +44,9 @@ func (t *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) { return t.base.RoundTrip(req) } -type Node struct { - config *Config +// RPCNode implements execution.Node using JSON-RPC connections. +type RPCNode struct { + config *execution.Config log logrus.FieldLogger client *ethclient.Client rpcClient *rpc.Client @@ -53,19 +62,20 @@ type Node struct { cancel context.CancelFunc } -func NewNode(log logrus.FieldLogger, conf *Config) *Node { - return &Node{ +// NewRPCNode creates a new RPC-based execution node. +func NewRPCNode(log logrus.FieldLogger, conf *execution.Config) *RPCNode { + return &RPCNode{ config: conf, log: log.WithFields(logrus.Fields{"type": "execution", "source": conf.Name}), services: []services.Service{}, } } -func (n *Node) OnReady(_ context.Context, callback func(ctx context.Context) error) { +func (n *RPCNode) OnReady(_ context.Context, callback func(ctx context.Context) error) { n.onReadyCallbacks = append(n.onReadyCallbacks, callback) } -func (n *Node) Start(ctx context.Context) error { +func (n *RPCNode) Start(ctx context.Context) error { n.log.WithFields(logrus.Fields{ "node_name": n.name, }).Info("Starting execution node") @@ -193,7 +203,7 @@ func (n *Node) Start(ctx context.Context) error { return nil } -func (n *Node) Stop(ctx context.Context) error { +func (n *RPCNode) Stop(ctx context.Context) error { n.log.Info("Stopping execution node") // Cancel the node context to signal all goroutines to stop @@ -230,7 +240,7 @@ func (n *Node) Stop(ctx context.Context) error { return nil } -func (n *Node) getServiceByName(name services.Name) (services.Service, error) { +func (n *RPCNode) getServiceByName(name services.Name) (services.Service, error) { for _, service := range n.services { if service.Name() == name { return service, nil @@ -240,7 +250,8 @@ func (n *Node) getServiceByName(name services.Name) (services.Service, error) { return nil, errors.New("service not found") } -func (n *Node) Metadata() *services.MetadataService { +// Metadata returns the metadata service for this node. +func (n *RPCNode) Metadata() *services.MetadataService { service, err := n.getServiceByName("metadata") if err != nil { // This should never happen. If it does, good luck. @@ -255,6 +266,70 @@ func (n *Node) Metadata() *services.MetadataService { return svc } -func (n *Node) Name() string { +// Name returns the configured name for this node. +func (n *RPCNode) Name() string { return n.config.Name } + +// ChainID returns the chain ID from the metadata service. +func (n *RPCNode) ChainID() int64 { + if meta := n.Metadata(); meta != nil { + return meta.ChainID() + } + + return 0 +} + +// ClientType returns the client type from the metadata service. +func (n *RPCNode) ClientType() string { + if meta := n.Metadata(); meta != nil { + return meta.ClientVersion() + } + + return "" +} + +// IsSynced returns true if the node is synced. +func (n *RPCNode) IsSynced() bool { + if meta := n.Metadata(); meta != nil { + return meta.IsSynced() + } + + return false +} + +// BlockNumber returns the current block number. +func (n *RPCNode) BlockNumber(ctx context.Context) (*uint64, error) { + return n.blockNumber(ctx) +} + +// BlockByNumber returns the block at the given number. +func (n *RPCNode) BlockByNumber(ctx context.Context, number *big.Int) (execution.Block, error) { + return n.blockByNumber(ctx, number) +} + +// BlocksByNumbers returns blocks at the given numbers using batch RPC. +// Returns blocks up to the first not-found (contiguous only). +func (n *RPCNode) BlocksByNumbers(ctx context.Context, numbers []*big.Int) ([]execution.Block, error) { + return n.blocksByNumbers(ctx, numbers) +} + +// BlockReceipts returns all receipts for the block at the given number. +func (n *RPCNode) BlockReceipts(ctx context.Context, number *big.Int) ([]execution.Receipt, error) { + return n.blockReceipts(ctx, number) +} + +// TransactionReceipt returns the receipt for the transaction with the given hash. +func (n *RPCNode) TransactionReceipt(ctx context.Context, hash string) (execution.Receipt, error) { + return n.transactionReceipt(ctx, hash) +} + +// DebugTraceTransaction returns the execution trace for the transaction. +func (n *RPCNode) DebugTraceTransaction( + ctx context.Context, + hash string, + blockNumber *big.Int, + opts execution.TraceOptions, +) (*execution.TraceTransaction, error) { + return n.debugTraceTransaction(ctx, hash, blockNumber, opts) +} diff --git a/pkg/ethereum/execution/services/client.go b/pkg/ethereum/execution/geth/services/client.go similarity index 97% rename from pkg/ethereum/execution/services/client.go rename to pkg/ethereum/execution/geth/services/client.go index 729c8e5..eb00df0 100644 --- a/pkg/ethereum/execution/services/client.go +++ b/pkg/ethereum/execution/geth/services/client.go @@ -1,3 +1,5 @@ +//go:build !embedded + package services import ( diff --git a/pkg/ethereum/execution/services/metadata.go b/pkg/ethereum/execution/geth/services/metadata.go similarity index 99% rename from pkg/ethereum/execution/services/metadata.go rename to pkg/ethereum/execution/geth/services/metadata.go index 353a1f9..d0314ae 100644 --- a/pkg/ethereum/execution/services/metadata.go +++ b/pkg/ethereum/execution/geth/services/metadata.go @@ -1,3 +1,5 @@ +//go:build !embedded + package services import ( diff --git a/pkg/ethereum/execution/services/service.go b/pkg/ethereum/execution/geth/services/service.go similarity index 92% rename from pkg/ethereum/execution/services/service.go rename to pkg/ethereum/execution/geth/services/service.go index 7048e47..3a103fa 100644 --- a/pkg/ethereum/execution/services/service.go +++ b/pkg/ethereum/execution/geth/services/service.go @@ -1,3 +1,5 @@ +//go:build !embedded + package services import "context" diff --git a/pkg/ethereum/execution/interface.go b/pkg/ethereum/execution/interface.go new file mode 100644 index 0000000..06423f0 --- /dev/null +++ b/pkg/ethereum/execution/interface.go @@ -0,0 +1,69 @@ +package execution + +import ( + "context" + "math/big" +) + +// Node defines the interface for execution data providers. +// +// Implementations include: +// - geth.RPCNode: connects to execution clients via JSON-RPC over HTTP +// - EmbeddedNode: receives data directly from host application via DataSource +// +// All methods must be safe for concurrent use by multiple goroutines. +// +// Lifecycle: +// 1. Create node with appropriate constructor (geth.NewRPCNode or NewEmbeddedNode) +// 2. Register OnReady callbacks before calling Start +// 3. Call Start to begin initialization +// 4. Node signals readiness by executing OnReady callbacks +// 5. Call Stop for graceful shutdown +type Node interface { + // Start initializes the node and begins any background operations. + // For RPCNode, this establishes the RPC connection and starts health monitoring. + // For EmbeddedNode, this is a no-op as the host controls the DataSource lifecycle. + Start(ctx context.Context) error + + // Stop gracefully shuts down the node and releases resources. + // Should be called when the node is no longer needed. + Stop(ctx context.Context) error + + // OnReady registers a callback to be invoked when the node becomes ready. + // For RPCNode, callbacks execute when the RPC connection is healthy. + // For EmbeddedNode, callbacks execute when MarkReady is called by the host. + // Multiple callbacks can be registered and will execute in registration order. + OnReady(ctx context.Context, callback func(ctx context.Context) error) + + // BlockNumber returns the current block number from the execution client. + BlockNumber(ctx context.Context) (*uint64, error) + + // BlockByNumber returns the block at the given number. + BlockByNumber(ctx context.Context, number *big.Int) (Block, error) + + // BlocksByNumbers returns blocks at the given numbers using batch RPC. + // Returns blocks up to the first not-found (contiguous only). + // If a block is not found, the returned slice contains all blocks before that point. + BlocksByNumbers(ctx context.Context, numbers []*big.Int) ([]Block, error) + + // BlockReceipts returns all receipts for the block at the given number. + BlockReceipts(ctx context.Context, number *big.Int) ([]Receipt, error) + + // TransactionReceipt returns the receipt for the transaction with the given hash. + TransactionReceipt(ctx context.Context, hash string) (Receipt, error) + + // DebugTraceTransaction returns the execution trace for the transaction. + DebugTraceTransaction(ctx context.Context, hash string, blockNumber *big.Int, opts TraceOptions) (*TraceTransaction, error) + + // ChainID returns the chain ID reported by the execution client. + ChainID() int64 + + // ClientType returns the client type/version string (e.g., "geth/1.10.0"). + ClientType() string + + // IsSynced returns true if the execution client is fully synced. + IsSynced() bool + + // Name returns the configured name for this node. + Name() string +} diff --git a/pkg/ethereum/execution/rpc.go b/pkg/ethereum/execution/rpc.go deleted file mode 100644 index 638cb84..0000000 --- a/pkg/ethereum/execution/rpc.go +++ /dev/null @@ -1,253 +0,0 @@ -package execution - -import ( - "context" - "encoding/hex" - "fmt" - "math/big" - "time" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/rpc" - - pcommon "github.com/ethpandaops/execution-processor/pkg/common" -) - -const ( - STATUS_ERROR = "error" - STATUS_SUCCESS = "success" -) - -func (n *Node) BlockNumber(ctx context.Context) (*uint64, error) { - start := time.Now() - - blockNumber, err := n.client.BlockNumber(ctx) - - duration := time.Since(start) - - // Record RPC metrics - status := STATUS_SUCCESS - if err != nil { - status = STATUS_ERROR - } - - network := n.Metadata().ChainID() - - pcommon.RPCCallDuration.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_blockNumber", status).Observe(duration.Seconds()) - pcommon.RPCCallsTotal.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_blockNumber", status).Inc() - - if err != nil { - return nil, err - } - - return &blockNumber, nil -} - -func (n *Node) BlockByNumber(ctx context.Context, blockNumber *big.Int) (*types.Block, error) { - start := time.Now() - - block, err := n.client.BlockByNumber(ctx, blockNumber) - - duration := time.Since(start) - - // Record RPC metrics - status := STATUS_SUCCESS - if err != nil { - status = STATUS_ERROR - } - - network := n.Metadata().ChainID() - - pcommon.RPCCallDuration.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_getBlockByNumber", status).Observe(duration.Seconds()) - pcommon.RPCCallsTotal.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "eth_getBlockByNumber", status).Inc() - - if err != nil { - return nil, err - } - - return block, nil -} - -// getTraceParams returns VM trace parameters with configurable options. -func getTraceParams(hash string, options TraceOptions) []any { - return []any{ - hash, - map[string]any{ - "disableStorage": options.DisableStorage, - "disableStack": options.DisableStack, - "disableMemory": options.DisableMemory, - "enableReturnData": options.EnableReturnData, - }, - } -} - -// traceTransactionErigon handles tracing for Erigon clients. -func (n *Node) traceTransactionErigon(ctx context.Context, hash string, options TraceOptions) (*TraceTransaction, error) { - var rsp ErigonResult - - start := time.Now() - - err := n.rpcClient.CallContext(ctx, &rsp, "debug_traceTransaction", getTraceParams(hash, options)...) - - duration := time.Since(start) - - // Record RPC metrics - status := STATUS_SUCCESS - if err != nil { - status = STATUS_ERROR - } - - network := n.Metadata().ChainID() - - pcommon.RPCCallDuration.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "debug_traceTransaction", status).Observe(duration.Seconds()) - pcommon.RPCCallsTotal.WithLabelValues(fmt.Sprintf("%d", network), n.config.Name, "debug_traceTransaction", status).Inc() - - if err != nil { - return nil, err - } - - returnValue := rsp.ReturnValue - if returnValue != nil && (*returnValue == "" || *returnValue == "0x") { - returnValue = nil - } - - result := &TraceTransaction{ - Gas: rsp.Gas, - Failed: rsp.Failed, - ReturnValue: returnValue, - Structlogs: []StructLog{}, - } - - // Empty array on transfer - for _, log := range rsp.StructLogs { - var returnData *string - - if log.ReturnData != nil { - returnData = new(string) - *returnData = hex.EncodeToString(log.ReturnData) - } - - result.Structlogs = append(result.Structlogs, StructLog{ - PC: log.PC, - Op: log.Op, - Gas: log.Gas, - GasCost: log.GasCost, - Depth: log.Depth, - ReturnData: returnData, - Refund: log.Refund, - Error: log.Error, - Stack: log.Stack, - }) - } - - return result, nil -} - -// BlockReceipts fetches all receipts for a block by number (much faster than per-tx). -func (n *Node) BlockReceipts(ctx context.Context, blockNumber *big.Int) ([]*types.Receipt, error) { - start := time.Now() - - blockNrOrHash := rpc.BlockNumberOrHashWithNumber(rpc.BlockNumber(blockNumber.Int64())) - - receipts, err := n.client.BlockReceipts(ctx, blockNrOrHash) - - duration := time.Since(start) - - // Record RPC metrics - status := STATUS_SUCCESS - if err != nil { - status = STATUS_ERROR - } - - network := n.Metadata().ChainID() - - pcommon.RPCCallDuration.WithLabelValues( - fmt.Sprintf("%d", network), - n.config.Name, - "eth_getBlockReceipts", - status, - ).Observe(duration.Seconds()) - - pcommon.RPCCallsTotal.WithLabelValues( - fmt.Sprintf("%d", network), - n.config.Name, - "eth_getBlockReceipts", - status, - ).Inc() - - if err != nil { - return nil, err - } - - return receipts, nil -} - -// TransactionReceipt fetches the receipt for a transaction by hash. -func (n *Node) TransactionReceipt(ctx context.Context, hash string) (*types.Receipt, error) { - start := time.Now() - - txHash := common.HexToHash(hash) - - receipt, err := n.client.TransactionReceipt(ctx, txHash) - - duration := time.Since(start) - - // Record RPC metrics - status := STATUS_SUCCESS - if err != nil { - status = STATUS_ERROR - } - - network := n.Metadata().ChainID() - - pcommon.RPCCallDuration.WithLabelValues( - fmt.Sprintf("%d", network), - n.config.Name, - "eth_getTransactionReceipt", - status, - ).Observe(duration.Seconds()) - - pcommon.RPCCallsTotal.WithLabelValues( - fmt.Sprintf("%d", network), - n.config.Name, - "eth_getTransactionReceipt", - status, - ).Inc() - - if err != nil { - return nil, err - } - - return receipt, nil -} - -// DebugTraceTransaction traces a transaction execution using the client's debug API. -func (n *Node) DebugTraceTransaction(ctx context.Context, hash string, blockNumber *big.Int, options TraceOptions) (*TraceTransaction, error) { - // Add a timeout if the context doesn't already have one - if _, hasDeadline := ctx.Deadline(); !hasDeadline { - var cancel context.CancelFunc - - ctx, cancel = context.WithTimeout(ctx, 60*time.Second) - - defer cancel() - } - - client := n.Metadata().Client(ctx) - - switch client { - case "geth": - return nil, fmt.Errorf("geth is not supported") - case "nethermind": - return nil, fmt.Errorf("nethermind is not supported") - case "besu": - return nil, fmt.Errorf("besu is not supported") - case "reth": - return nil, fmt.Errorf("reth is not supported") - case "erigon": - return n.traceTransactionErigon(ctx, hash, options) - default: - // Default to Erigon format if client is unknown - return n.traceTransactionErigon(ctx, hash, options) - } -} diff --git a/pkg/ethereum/execution/sanitize.go b/pkg/ethereum/execution/sanitize.go new file mode 100644 index 0000000..6215693 --- /dev/null +++ b/pkg/ethereum/execution/sanitize.go @@ -0,0 +1,27 @@ +package execution + +// SanitizeGasCost detects and corrects corrupted gasCost values from Erigon's +// debug_traceTransaction RPC. +// +// Bug: Erigon has an unsigned integer underflow bug in gas.go:callGas() where +// `availableGas - base` underflows when availableGas < base, producing huge +// corrupted values (e.g., 18158513697557845033). +// +// Detection: gasCost can never legitimately exceed the available gas at that +// opcode. If gasCost > Gas, the value is corrupted. +// +// Correction: Set gasCost = Gas (all available gas consumed), matching Reth's +// behavior for failed CALL opcodes. +func SanitizeGasCost(log *StructLog) { + if log.GasCost > log.Gas { + log.GasCost = log.Gas + } +} + +// SanitizeStructLogs applies gas cost sanitization to all structlogs. +// This corrects corrupted values from Erigon's unsigned integer underflow bug. +func SanitizeStructLogs(logs []StructLog) { + for i := range logs { + SanitizeGasCost(&logs[i]) + } +} diff --git a/pkg/ethereum/execution/sanitize_test.go b/pkg/ethereum/execution/sanitize_test.go new file mode 100644 index 0000000..84d3bed --- /dev/null +++ b/pkg/ethereum/execution/sanitize_test.go @@ -0,0 +1,74 @@ +package execution + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSanitizeGasCost(t *testing.T) { + tests := []struct { + name string + gas uint64 + gasCost uint64 + expectedGasCost uint64 + }{ + { + name: "normal gasCost unchanged", + gas: 10000, + gasCost: 3, + expectedGasCost: 3, + }, + { + name: "gasCost equals gas unchanged", + gas: 5058, + gasCost: 5058, + expectedGasCost: 5058, + }, + { + name: "corrupted gasCost from Erigon underflow bug", + gas: 5058, + gasCost: 18158513697557845033, // 0xfc00000000001429 + expectedGasCost: 5058, + }, + { + name: "another corrupted value", + gas: 9974, + gasCost: 18158513697557850263, // From test case + expectedGasCost: 9974, + }, + { + name: "max uint64 corrupted", + gas: 1000, + gasCost: ^uint64(0), // max uint64 + expectedGasCost: 1000, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + log := &StructLog{ + Gas: tt.gas, + GasCost: tt.gasCost, + } + + SanitizeGasCost(log) + + assert.Equal(t, tt.expectedGasCost, log.GasCost) + }) + } +} + +func TestSanitizeStructLogs(t *testing.T) { + logs := []StructLog{ + {Op: "PUSH1", Gas: 10000, GasCost: 3}, + {Op: "CALL", Gas: 5058, GasCost: 18158513697557845033}, // Corrupted + {Op: "STOP", Gas: 100, GasCost: 0}, + } + + SanitizeStructLogs(logs) + + assert.Equal(t, uint64(3), logs[0].GasCost, "normal gasCost should be unchanged") + assert.Equal(t, uint64(5058), logs[1].GasCost, "corrupted gasCost should be sanitized") + assert.Equal(t, uint64(0), logs[2].GasCost, "zero gasCost should be unchanged") +} diff --git a/pkg/ethereum/execution/structlog.go b/pkg/ethereum/execution/structlog.go index 4deeb2a..5b0c856 100644 --- a/pkg/ethereum/execution/structlog.go +++ b/pkg/ethereum/execution/structlog.go @@ -8,14 +8,51 @@ type TraceTransaction struct { Structlogs []StructLog } +// StructLog represents a single EVM opcode execution trace entry. +// +// This struct supports two operation modes: +// - RPC mode: Stack is populated for CALL opcodes, CallToAddress/GasUsed computed post-hoc +// - Embedded mode: CallToAddress/GasUsed pre-computed by tracer, Stack remains nil +// +// The embedded mode optimizations eliminate ~99% of stack-related allocations +// and remove the post-processing GasUsed computation pass. type StructLog struct { - PC uint32 `json:"pc"` - Op string `json:"op"` - Gas uint64 `json:"gas"` - GasCost uint64 `json:"gasCost"` - Depth uint64 `json:"depth"` - ReturnData *string `json:"returnData"` - Refund *uint64 `json:"refund,omitempty"` - Error *string `json:"error,omitempty"` - Stack *[]string `json:"stack,omitempty"` + // PC is the program counter. Kept for RPC backward compatibility but not + // populated in embedded mode (always 0). + PC uint32 `json:"pc"` + + // Op is the opcode name (e.g., "PUSH1", "CALL", "SSTORE"). + Op string `json:"op"` + + // Gas is the remaining gas before this opcode executes. + Gas uint64 `json:"gas"` + + // GasCost is the static gas cost of the opcode (may differ from actual GasUsed). + GasCost uint64 `json:"gasCost"` + + // GasUsed is the actual gas consumed by this opcode. + // In embedded mode: pre-computed by tracer using gas difference to next opcode. + // In RPC mode: computed post-hoc by ComputeGasUsed(), this field will be 0. + GasUsed uint64 `json:"gasUsed,omitempty"` + + // Depth is the call stack depth (1 = top-level, increases with CALL/CREATE). + Depth uint64 `json:"depth"` + + // ReturnData contains the return data from the last CALL/STATICCALL/etc. + ReturnData *string `json:"returnData"` + + // Refund is the gas refund counter value. + Refund *uint64 `json:"refund,omitempty"` + + // Error contains any error message if the opcode failed. + Error *string `json:"error,omitempty"` + + // Stack contains the EVM stack state (RPC mode only). + // In embedded mode this is nil - use CallToAddress instead. + Stack *[]string `json:"stack,omitempty"` + + // CallToAddress is the target address for CALL/STATICCALL/DELEGATECALL/CALLCODE. + // In embedded mode: pre-extracted by tracer from stack[len-2]. + // In RPC mode: nil, extracted post-hoc from Stack by extractCallAddress(). + CallToAddress *string `json:"callToAddress,omitempty"` } diff --git a/pkg/ethereum/pool.go b/pkg/ethereum/pool.go index 2d122e0..d04297c 100644 --- a/pkg/ethereum/pool.go +++ b/pkg/ethereum/pool.go @@ -14,35 +14,56 @@ import ( type Pool struct { log logrus.FieldLogger - executionNodes []*execution.Node + executionNodes []execution.Node metrics *Metrics config *Config mu sync.RWMutex - healthyExecutionNodes map[*execution.Node]bool + healthyExecutionNodes map[execution.Node]bool // Goroutine management wg sync.WaitGroup cancel context.CancelFunc } -func NewPool(log logrus.FieldLogger, namespace string, config *Config) *Pool { +// NewPoolWithNodes creates a pool with pre-created Node implementations. +// Use this when embedding execution-processor as a library where the host +// provides custom Node implementations (e.g., EmbeddedNode with DataSource). +// +// Parameters: +// - log: Logger for pool operations +// - namespace: Metrics namespace prefix (will have "_ethereum" appended) +// - nodes: Pre-created Node implementations +// - config: Optional configuration (nil creates empty config with defaults) +// +// Example: +// +// // Create embedded node with custom data source +// dataSource := &MyDataSource{client: myClient} +// node := execution.NewEmbeddedNode(log, "my-node", dataSource) +// +// // Create pool with the embedded node +// pool := ethereum.NewPoolWithNodes(log, "processor", []execution.Node{node}, nil) +// pool.Start(ctx) +// +// // Mark ready when data source is ready +// node.MarkReady(ctx) +func NewPoolWithNodes(log logrus.FieldLogger, namespace string, nodes []execution.Node, config *Config) *Pool { namespace = fmt.Sprintf("%s_ethereum", namespace) - p := &Pool{ + + // If config is nil, create an empty config + if config == nil { + config = &Config{} + } + + return &Pool{ log: log, - executionNodes: make([]*execution.Node, 0), - healthyExecutionNodes: make(map[*execution.Node]bool), + executionNodes: nodes, + healthyExecutionNodes: make(map[execution.Node]bool, len(nodes)), metrics: GetMetricsInstance(namespace), config: config, } - - for _, execCfg := range config.Execution { - node := execution.NewNode(log, execCfg) - p.executionNodes = append(p.executionNodes, node) - } - - return p } func (p *Pool) HasExecutionNodes() bool { @@ -62,11 +83,11 @@ func (p *Pool) HasHealthyExecutionNodes() bool { return false } -func (p *Pool) GetHealthyExecutionNodes() []*execution.Node { +func (p *Pool) GetHealthyExecutionNodes() []execution.Node { p.mu.RLock() defer p.mu.RUnlock() - var healthyNodes []*execution.Node + healthyNodes := make([]execution.Node, 0, len(p.healthyExecutionNodes)) for node, healthy := range p.healthyExecutionNodes { if healthy { @@ -77,11 +98,11 @@ func (p *Pool) GetHealthyExecutionNodes() []*execution.Node { return healthyNodes } -func (p *Pool) GetHealthyExecutionNode() *execution.Node { +func (p *Pool) GetHealthyExecutionNode() execution.Node { p.mu.RLock() defer p.mu.RUnlock() - var healthyNodes []*execution.Node + healthyNodes := make([]execution.Node, 0, len(p.healthyExecutionNodes)) for node, healthy := range p.healthyExecutionNodes { if healthy { @@ -97,7 +118,7 @@ func (p *Pool) GetHealthyExecutionNode() *execution.Node { return healthyNodes[rand.IntN(len(healthyNodes))] } -func (p *Pool) WaitForHealthyExecutionNode(ctx context.Context) (*execution.Node, error) { +func (p *Pool) WaitForHealthyExecutionNode(ctx context.Context) (execution.Node, error) { // Check if we have any execution nodes configured if len(p.executionNodes) == 0 { return nil, fmt.Errorf("no execution nodes configured") @@ -193,15 +214,24 @@ func (p *Pool) Start(ctx context.Context) { p.UpdateNodeMetrics() for _, node := range p.executionNodes { - g.Go(func() error { - node.OnReady(ctx, func(innerCtx context.Context) error { - p.mu.Lock() - p.healthyExecutionNodes[node] = true - p.mu.Unlock() - - return nil - }) + // Register OnReady callbacks synchronously BEFORE spawning goroutines. + // This ensures callbacks are registered before Start() returns, so any + // subsequent call to MarkReady() (for EmbeddedNode) will find the callback. + // Previously, registration happened inside g.Go() which created a race + // condition where MarkReady() could execute before callbacks were registered. + p.log.WithField("node", node.Name()).Info("Registering OnReady callback for node") + node.OnReady(ctx, func(innerCtx context.Context) error { + p.log.WithField("node", node.Name()).Info("OnReady callback executed, marking node healthy") + p.mu.Lock() + p.healthyExecutionNodes[node] = true + p.mu.Unlock() + + return nil + }) + // Start node asynchronously - the actual initialization can be slow + // (e.g., RPC connection establishment), but callback registration is instant. + g.Go(func() error { return node.Start(ctx) }) } diff --git a/pkg/ethereum/pool_rpc.go b/pkg/ethereum/pool_rpc.go new file mode 100644 index 0000000..38ef9e8 --- /dev/null +++ b/pkg/ethereum/pool_rpc.go @@ -0,0 +1,32 @@ +//go:build !embedded + +package ethereum + +import ( + "fmt" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution/geth" + "github.com/sirupsen/logrus" +) + +// NewPool creates a new pool from config, using RPC nodes. +// This function imports go-ethereum types through the geth package. +// For embedded mode (no go-ethereum dependency), use NewPoolWithNodes instead. +func NewPool(log logrus.FieldLogger, namespace string, config *Config) *Pool { + namespace = fmt.Sprintf("%s_ethereum", namespace) + p := &Pool{ + log: log, + executionNodes: make([]execution.Node, 0, len(config.Execution)), + healthyExecutionNodes: make(map[execution.Node]bool, len(config.Execution)), + metrics: GetMetricsInstance(namespace), + config: config, + } + + for _, execCfg := range config.Execution { + node := geth.NewRPCNode(log, execCfg) + p.executionNodes = append(p.executionNodes, node) + } + + return p +} diff --git a/pkg/ethereum/pool_test.go b/pkg/ethereum/pool_test.go index 05c4375..66a72f6 100644 --- a/pkg/ethereum/pool_test.go +++ b/pkg/ethereum/pool_test.go @@ -2,6 +2,7 @@ package ethereum_test import ( "context" + "math/big" "sync" "testing" "time" @@ -13,6 +14,135 @@ import ( "github.com/stretchr/testify/require" ) +// MockBlock implements execution.Block for testing. +type MockBlock struct { + number *big.Int +} + +func (b *MockBlock) Number() *big.Int { return b.number } +func (b *MockBlock) Hash() execution.Hash { return execution.Hash{} } +func (b *MockBlock) ParentHash() execution.Hash { return execution.Hash{} } +func (b *MockBlock) BaseFee() *big.Int { return nil } +func (b *MockBlock) Transactions() []execution.Transaction { return nil } + +// MockReceipt implements execution.Receipt for testing. +type MockReceipt struct{} + +func (r *MockReceipt) Status() uint64 { return 1 } +func (r *MockReceipt) TxHash() execution.Hash { return execution.Hash{} } +func (r *MockReceipt) GasUsed() uint64 { return 21000 } + +// MockNode implements execution.Node for testing. +type MockNode struct { + name string + started bool + stopped bool + onReadyCallbacks []func(ctx context.Context) error + mu sync.Mutex +} + +func NewMockNode(name string) *MockNode { + return &MockNode{ + name: name, + onReadyCallbacks: make([]func(ctx context.Context) error, 0), + } +} + +func (m *MockNode) Start(_ context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + + m.started = true + + return nil +} + +func (m *MockNode) Stop(_ context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + + m.stopped = true + + return nil +} + +func (m *MockNode) OnReady(_ context.Context, callback func(ctx context.Context) error) { + m.mu.Lock() + defer m.mu.Unlock() + + m.onReadyCallbacks = append(m.onReadyCallbacks, callback) +} + +// TriggerReady simulates the node becoming ready by calling all OnReady callbacks. +func (m *MockNode) TriggerReady(ctx context.Context) error { + m.mu.Lock() + callbacks := m.onReadyCallbacks + m.mu.Unlock() + + for _, cb := range callbacks { + if err := cb(ctx); err != nil { + return err + } + } + + return nil +} + +func (m *MockNode) BlockNumber(_ context.Context) (*uint64, error) { + num := uint64(12345) + + return &num, nil +} + +func (m *MockNode) BlockByNumber(_ context.Context, number *big.Int) (execution.Block, error) { + return &MockBlock{number: number}, nil +} + +func (m *MockNode) BlocksByNumbers(_ context.Context, numbers []*big.Int) ([]execution.Block, error) { + blocks := make([]execution.Block, len(numbers)) + for i, num := range numbers { + blocks[i] = &MockBlock{number: num} + } + + return blocks, nil +} + +func (m *MockNode) BlockReceipts(_ context.Context, _ *big.Int) ([]execution.Receipt, error) { + return []execution.Receipt{}, nil +} + +func (m *MockNode) TransactionReceipt(_ context.Context, _ string) (execution.Receipt, error) { + return &MockReceipt{}, nil +} + +func (m *MockNode) DebugTraceTransaction( + _ context.Context, + _ string, + _ *big.Int, + _ execution.TraceOptions, +) (*execution.TraceTransaction, error) { + return &execution.TraceTransaction{}, nil +} + +func (m *MockNode) ChainID() int64 { + return 1 +} + +func (m *MockNode) ClientType() string { + return "mock" +} + +func (m *MockNode) IsSynced() bool { + return true +} + +func (m *MockNode) Name() string { + return m.name +} + +// Compile-time check that MockNode implements execution.Node. +var _ execution.Node = (*MockNode)(nil) + func TestPool_Creation(t *testing.T) { log := logrus.New() log.SetLevel(logrus.ErrorLevel) @@ -448,3 +578,249 @@ func TestPool_NodeSelection(t *testing.T) { func stringPtr(s string) *string { return &s } + +// Tests for NewPoolWithNodes + +func TestPool_NewPoolWithNodes_Basic(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + node1 := NewMockNode("mock-node-1") + node2 := NewMockNode("mock-node-2") + + nodes := []execution.Node{node1, node2} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + require.NotNil(t, pool) + assert.True(t, pool.HasExecutionNodes()) + assert.False(t, pool.HasHealthyExecutionNodes()) +} + +func TestPool_NewPoolWithNodes_NilConfig(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + node := NewMockNode("mock-node") + + nodes := []execution.Node{node} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + require.NotNil(t, pool) + assert.True(t, pool.HasExecutionNodes()) + + // Verify default config behavior - unknown chain ID should error + network, err := pool.GetNetworkByChainID(999999) + assert.Error(t, err) + assert.Nil(t, network) +} + +func TestPool_NewPoolWithNodes_EmptyNodes(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + nodes := []execution.Node{} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + require.NotNil(t, pool) + assert.False(t, pool.HasExecutionNodes()) + assert.False(t, pool.HasHealthyExecutionNodes()) + assert.Nil(t, pool.GetHealthyExecutionNode()) + assert.Empty(t, pool.GetHealthyExecutionNodes()) +} + +func TestPool_NewPoolWithNodes_MultipleNodes(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + const numNodes = 5 + + nodes := make([]execution.Node, numNodes) + for i := 0; i < numNodes; i++ { + nodes[i] = NewMockNode("mock-node-" + string(rune('a'+i))) + } + + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + require.NotNil(t, pool) + assert.True(t, pool.HasExecutionNodes()) +} + +func TestPool_NewPoolWithNodes_WithConfig(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + node := NewMockNode("mock-node") + overrideName := "custom-network" + + config := ðereum.Config{ + OverrideNetworkName: &overrideName, + } + + nodes := []execution.Node{node} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, config) + + require.NotNil(t, pool) + + // Verify config is used - override name should be returned + network, err := pool.GetNetworkByChainID(999999) + require.NoError(t, err) + assert.Equal(t, "custom-network", network.Name) +} + +func TestPool_NewPoolWithNodes_StartStop(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + node := NewMockNode("mock-node") + + nodes := []execution.Node{node} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + ctx := context.Background() + pool.Start(ctx) + + // Wait for async Start goroutines to execute + time.Sleep(50 * time.Millisecond) + + // Node should have been started + node.mu.Lock() + assert.True(t, node.started) + node.mu.Unlock() + + stopCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := pool.Stop(stopCtx) + assert.NoError(t, err) + + // Node should have been stopped + node.mu.Lock() + assert.True(t, node.stopped) + node.mu.Unlock() +} + +func TestPool_NewPoolWithNodes_NodeBecomesHealthy(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + node := NewMockNode("mock-node") + + nodes := []execution.Node{node} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + ctx := context.Background() + pool.Start(ctx) + + defer func() { + stopCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := pool.Stop(stopCtx) + assert.NoError(t, err) + }() + + // Wait for async Start goroutines to register callbacks + time.Sleep(50 * time.Millisecond) + + // Initially no healthy nodes + assert.False(t, pool.HasHealthyExecutionNodes()) + + // Trigger the node to become ready (simulates OnReady callback) + err := node.TriggerReady(ctx) + require.NoError(t, err) + + // Now the pool should have a healthy node + assert.True(t, pool.HasHealthyExecutionNodes()) + assert.NotNil(t, pool.GetHealthyExecutionNode()) +} + +func TestPool_EmbeddedNodeIntegration(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + // Create an EmbeddedNode with a mock data source + ds := &testDataSource{} + embeddedNode := execution.NewEmbeddedNode(log, "embedded-test", ds) + + nodes := []execution.Node{embeddedNode} + pool := ethereum.NewPoolWithNodes(log, "test", nodes, nil) + + ctx := context.Background() + pool.Start(ctx) + + defer func() { + stopCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := pool.Stop(stopCtx) + assert.NoError(t, err) + }() + + // Wait for async Start goroutines to register callbacks + time.Sleep(50 * time.Millisecond) + + // Initially no healthy nodes + assert.False(t, pool.HasHealthyExecutionNodes()) + + // Mark embedded node as ready + err := embeddedNode.MarkReady(ctx) + require.NoError(t, err) + + // Now the pool should have a healthy node + assert.True(t, pool.HasHealthyExecutionNodes()) + + healthyNode := pool.GetHealthyExecutionNode() + require.NotNil(t, healthyNode) + assert.Equal(t, "embedded-test", healthyNode.Name()) +} + +// testDataSource is a minimal DataSource implementation for integration tests. +type testDataSource struct{} + +func (ds *testDataSource) BlockNumber(_ context.Context) (*uint64, error) { + num := uint64(12345) + + return &num, nil +} + +func (ds *testDataSource) BlockByNumber(_ context.Context, number *big.Int) (execution.Block, error) { + return &MockBlock{number: number}, nil +} + +func (ds *testDataSource) BlockReceipts(_ context.Context, _ *big.Int) ([]execution.Receipt, error) { + return []execution.Receipt{}, nil +} + +func (ds *testDataSource) TransactionReceipt(_ context.Context, _ string) (execution.Receipt, error) { + return &MockReceipt{}, nil +} + +func (ds *testDataSource) DebugTraceTransaction( + _ context.Context, + _ string, + _ *big.Int, + _ execution.TraceOptions, +) (*execution.TraceTransaction, error) { + return &execution.TraceTransaction{}, nil +} + +func (ds *testDataSource) ChainID() int64 { + return 1 +} + +func (ds *testDataSource) ClientType() string { + return "test" +} + +func (ds *testDataSource) IsSynced() bool { + return true +} + +func (ds *testDataSource) BlocksByNumbers(_ context.Context, numbers []*big.Int) ([]execution.Block, error) { + blocks := make([]execution.Block, len(numbers)) + for i, num := range numbers { + blocks[i] = &MockBlock{number: num} + } + + return blocks, nil +} diff --git a/pkg/leaderelection/interface.go b/pkg/leaderelection/interface.go index 9fade5b..ef0cd11 100644 --- a/pkg/leaderelection/interface.go +++ b/pkg/leaderelection/interface.go @@ -5,6 +5,12 @@ import ( "time" ) +// LeadershipCallback is a function invoked when leadership status changes. +// The callback is invoked synchronously - implementations should return quickly (< 100ms) +// to avoid delaying leadership renewal. Long-running operations should be spawned +// in a separate goroutine. +type LeadershipCallback func(ctx context.Context, isLeader bool) + // Elector defines the interface for leader election implementations. type Elector interface { // Start begins the leader election process @@ -16,12 +22,13 @@ type Elector interface { // IsLeader returns true if this node is currently the leader IsLeader() bool - // LeadershipChannel returns a channel that receives leadership changes - // true = gained leadership, false = lost leadership - LeadershipChannel() <-chan bool - - // GetLeaderID returns the current leader's ID - GetLeaderID() (string, error) + // OnLeadershipChange registers a callback for guaranteed leadership notification. + // The callback is invoked synchronously when leadership status changes. + // Multiple callbacks can be registered and will be invoked in registration order. + // + // Important: Keep callbacks fast (< 100ms) to avoid delaying leadership renewal. + // For long-running operations, spawn a goroutine within the callback. + OnLeadershipChange(callback LeadershipCallback) } // Config holds configuration for leader election. @@ -32,7 +39,7 @@ type Config struct { // RenewalInterval is how often to renew the leader lock RenewalInterval time.Duration - // NodeID is the unique identifier for this node + // NodeID is the unique identifier for this node (for logging/metrics only) // If empty, a random ID will be generated NodeID string } diff --git a/pkg/leaderelection/redis_election.go b/pkg/leaderelection/redis_election.go index 1edcfb8..9bbaf83 100644 --- a/pkg/leaderelection/redis_election.go +++ b/pkg/leaderelection/redis_election.go @@ -10,40 +10,48 @@ import ( "time" "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/go-redsync/redsync/v4" + "github.com/go-redsync/redsync/v4/redis/goredis/v9" "github.com/redis/go-redis/v9" "github.com/sirupsen/logrus" ) -// RedisElector implements leader election using Redis. +// RedisElector implements leader election using Redis with redsync. type RedisElector struct { - client *redis.Client + rs *redsync.Redsync + mutex *redsync.Mutex log logrus.FieldLogger config *Config nodeID string keyName string - network string // Network name for metrics + network string mu sync.RWMutex isLeader bool leadershipStartTime time.Time stopped bool - leadershipChan chan bool - stopChan chan struct{} - wg sync.WaitGroup + callbacksMu sync.RWMutex + callbacks []LeadershipCallback + + stopChan chan struct{} + wg sync.WaitGroup } -// NewRedisElector creates a new Redis-based leader elector. -func NewRedisElector(client *redis.Client, log logrus.FieldLogger, keyName string, config *Config) (*RedisElector, error) { +// NewRedisElector creates a new Redis-based leader elector using redsync. +func NewRedisElector( + client *redis.Client, + log logrus.FieldLogger, + keyName string, + config *Config, +) (*RedisElector, error) { if config == nil { config = DefaultConfig() } - // Generate random node ID if not provided nodeID := config.NodeID if nodeID == "" { bytes := make([]byte, 16) - if _, err := rand.Read(bytes); err != nil { return nil, fmt.Errorf("failed to generate node ID: %w", err) } @@ -59,15 +67,16 @@ func NewRedisElector(client *redis.Client, log logrus.FieldLogger, keyName strin network = parts[2] } + pool := goredis.NewPool(client) + return &RedisElector{ - client: client, - log: log.WithField("component", "leader-election").WithField("node_id", nodeID), - config: config, - nodeID: nodeID, - keyName: keyName, - network: network, - leadershipChan: make(chan bool, 10), - stopChan: make(chan struct{}), + rs: redsync.New(pool), + log: log.WithField("component", "leader-election").WithField("node_id", nodeID), + config: config, + nodeID: nodeID, + keyName: keyName, + network: network, + stopChan: make(chan struct{}), }, nil } @@ -75,7 +84,6 @@ func NewRedisElector(client *redis.Client, log logrus.FieldLogger, keyName strin func (e *RedisElector) Start(ctx context.Context) error { e.log.Info("Starting leader election") - // Initialize metrics common.LeaderElectionStatus.WithLabelValues(e.network, e.nodeID).Set(0) e.wg.Add(1) @@ -103,24 +111,20 @@ func (e *RedisElector) Stop(ctx context.Context) error { close(e.stopChan) e.wg.Wait() - // Release leadership if we have it if e.IsLeader() { - // Record final leadership duration e.mu.RLock() - leadershipDuration := time.Since(e.leadershipStartTime).Seconds() + duration := time.Since(e.leadershipStartTime).Seconds() e.mu.RUnlock() - common.LeaderElectionDuration.WithLabelValues(e.network, e.nodeID).Observe(leadershipDuration) + common.LeaderElectionDuration.WithLabelValues(e.network, e.nodeID).Observe(duration) common.LeaderElectionStatus.WithLabelValues(e.network, e.nodeID).Set(0) - if err := e.releaseLeadership(ctx); err != nil { + if _, err := e.mutex.UnlockContext(ctx); err != nil { e.log.WithError(err).Error("Failed to release leadership on stop") common.LeaderElectionErrors.WithLabelValues(e.network, e.nodeID, "release").Inc() } } - close(e.leadershipChan) - return nil } @@ -132,26 +136,12 @@ func (e *RedisElector) IsLeader() bool { return e.isLeader } -// LeadershipChannel returns a channel that receives leadership changes. -func (e *RedisElector) LeadershipChannel() <-chan bool { - return e.leadershipChan -} - -// GetLeaderID returns the current leader's ID. -func (e *RedisElector) GetLeaderID() (string, error) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() +// OnLeadershipChange registers a callback for guaranteed leadership notification. +func (e *RedisElector) OnLeadershipChange(callback LeadershipCallback) { + e.callbacksMu.Lock() + defer e.callbacksMu.Unlock() - val, err := e.client.Get(ctx, e.keyName).Result() - if err == redis.Nil { - return "", fmt.Errorf("no leader elected") - } - - if err != nil { - return "", fmt.Errorf("failed to get leader ID: %w", err) - } - - return val, nil + e.callbacks = append(e.callbacks, callback) } // run is the main election loop. @@ -161,9 +151,8 @@ func (e *RedisElector) run(ctx context.Context) { ticker := time.NewTicker(e.config.RenewalInterval) defer ticker.Stop() - // Try to acquire leadership immediately - if e.tryAcquireLeadership(ctx) { - e.handleLeadershipGain() + if e.tryAcquire(ctx) { + e.handleGain(ctx) } for { @@ -174,165 +163,93 @@ func (e *RedisElector) run(ctx context.Context) { return case <-ticker.C: if e.IsLeader() { - // Try to renew leadership - if !e.renewLeadership(ctx) { - e.handleLeadershipLoss() + if !e.tryExtend(ctx) { + e.handleLoss(ctx) } } else { - // Try to acquire leadership - if e.tryAcquireLeadership(ctx) { - e.handleLeadershipGain() + if e.tryAcquire(ctx) { + e.handleGain(ctx) } } } } } -// tryAcquireLeadership attempts to become the leader. -func (e *RedisElector) tryAcquireLeadership(ctx context.Context) bool { - e.log.WithFields(logrus.Fields{ - "key": e.keyName, - "ttl": e.config.TTL, - }).Debug("Attempting to acquire leadership") - - // Use SET with NX (only set if not exists) and PX (expire in milliseconds) - ok, err := e.client.SetNX(ctx, e.keyName, e.nodeID, e.config.TTL).Result() - if err != nil { - e.log.WithError(err).Error("Failed to acquire leadership") +// tryAcquire attempts to acquire the leadership lock. +func (e *RedisElector) tryAcquire(ctx context.Context) bool { + e.mutex = e.rs.NewMutex( + e.keyName, + redsync.WithExpiry(e.config.TTL), + redsync.WithTries(1), + redsync.WithDriftFactor(0.01), + redsync.WithSetNXOnExtend(), + ) - common.LeaderElectionErrors.WithLabelValues(e.network, e.nodeID, "acquire").Inc() + if err := e.mutex.TryLockContext(ctx); err != nil { + e.log.WithError(err).Info("Failed to acquire leadership") return false } - if ok { - e.mu.Lock() - e.isLeader = true - e.leadershipStartTime = time.Now() - e.mu.Unlock() - - common.LeaderElectionStatus.WithLabelValues(e.network, e.nodeID).Set(1) - common.LeaderElectionTransitions.WithLabelValues(e.network, e.nodeID, "gained").Inc() - - e.log.Info("Acquired leadership") - - return true - } + e.mu.Lock() + e.isLeader = true + e.leadershipStartTime = time.Now() + e.mu.Unlock() - // Check who is the current leader - currentLeader, _ := e.client.Get(ctx, e.keyName).Result() - e.log.WithFields(logrus.Fields{ - "current_leader": currentLeader, - "our_node_id": e.nodeID, - }).Debug("Failed to acquire leadership, another node is leader") + common.LeaderElectionStatus.WithLabelValues(e.network, e.nodeID).Set(1) + common.LeaderElectionTransitions.WithLabelValues(e.network, e.nodeID, "gained").Inc() + e.log.Info("Acquired leadership") - return false + return true } -// renewLeadership attempts to extend the leadership lock. -func (e *RedisElector) renewLeadership(ctx context.Context) bool { - // Lua script to atomically check ownership and extend TTL - script := ` - if redis.call("get", KEYS[1]) == ARGV[1] then - return redis.call("pexpire", KEYS[1], ARGV[2]) - else - return 0 - end - ` - - result, err := e.client.Eval(ctx, script, []string{e.keyName}, e.nodeID, e.config.TTL.Milliseconds()).Result() - if err != nil { - e.log.WithError(err).Error("Failed to renew leadership") - - common.LeaderElectionErrors.WithLabelValues(e.network, e.nodeID, "renew").Inc() - - return false - } - - val, ok := result.(int64) - if !ok { - e.log.WithError(fmt.Errorf("failed to renew leadership: %w", err)).Error("Failed to renew leadership") - +// tryExtend attempts to extend the leadership lock TTL. +func (e *RedisElector) tryExtend(ctx context.Context) bool { + ok, err := e.mutex.ExtendContext(ctx) + if err != nil || !ok { + e.log.WithError(err).Warn("Failed to extend leadership") common.LeaderElectionErrors.WithLabelValues(e.network, e.nodeID, "renew").Inc() return false } - success := val == 1 - - if !success { - e.log.Warn("Failed to renew leadership - lock not owned by this node") - - common.LeaderElectionErrors.WithLabelValues(e.network, e.nodeID, "renew").Inc() - } - - return success -} - -// releaseLeadership voluntarily gives up leadership. -func (e *RedisElector) releaseLeadership(ctx context.Context) error { - // Lua script to atomically check ownership and delete - script := ` - if redis.call("get", KEYS[1]) == ARGV[1] then - return redis.call("del", KEYS[1]) - else - return 0 - end - ` - - result, err := e.client.Eval(ctx, script, []string{e.keyName}, e.nodeID).Result() - if err != nil { - return fmt.Errorf("failed to release leadership: %w", err) - } - - val, ok := result.(int64) - if !ok { - return fmt.Errorf("failed to release leadership: %w", err) - } - - if val == 0 { - e.log.Warn("Could not release leadership - lock not owned by this node") - } else { - e.log.Info("Released leadership") - } - - e.mu.Lock() - e.isLeader = false - e.mu.Unlock() - - return nil + return true } -// handleLeadershipGain is called when leadership is acquired. -func (e *RedisElector) handleLeadershipGain() { - e.log.Info("Gained leadership") +// handleGain is called when leadership is acquired. +func (e *RedisElector) handleGain(ctx context.Context) { + e.callbacksMu.RLock() + callbacks := make([]LeadershipCallback, len(e.callbacks)) + copy(callbacks, e.callbacks) + e.callbacksMu.RUnlock() - select { - case e.leadershipChan <- true: - default: - e.log.Warn("Leadership channel full, dropping leadership gain event") + for _, cb := range callbacks { + cb(ctx, true) } } -// handleLeadershipLoss is called when leadership is lost. -func (e *RedisElector) handleLeadershipLoss() { +// handleLoss is called when leadership is lost. +func (e *RedisElector) handleLoss(ctx context.Context) { e.mu.Lock() wasLeader := e.isLeader e.isLeader = false - leadershipDuration := time.Since(e.leadershipStartTime).Seconds() + duration := time.Since(e.leadershipStartTime).Seconds() e.mu.Unlock() if wasLeader { common.LeaderElectionStatus.WithLabelValues(e.network, e.nodeID).Set(0) common.LeaderElectionTransitions.WithLabelValues(e.network, e.nodeID, "lost").Inc() - common.LeaderElectionDuration.WithLabelValues(e.network, e.nodeID).Observe(leadershipDuration) + common.LeaderElectionDuration.WithLabelValues(e.network, e.nodeID).Observe(duration) } e.log.Info("Lost leadership") - select { - case e.leadershipChan <- false: - default: - e.log.Warn("Leadership channel full, dropping leadership loss event") + e.callbacksMu.RLock() + callbacks := make([]LeadershipCallback, len(e.callbacks)) + copy(callbacks, e.callbacks) + e.callbacksMu.RUnlock() + + for _, cb := range callbacks { + cb(ctx, false) } } diff --git a/pkg/leaderelection/redis_election_test.go b/pkg/leaderelection/redis_election_test.go index b772413..803df04 100644 --- a/pkg/leaderelection/redis_election_test.go +++ b/pkg/leaderelection/redis_election_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "sync/atomic" "testing" "time" @@ -105,48 +106,11 @@ func TestNewRedisElector(t *testing.T) { if elector.IsLeader() { t.Error("new elector should not be leader initially") } - - // Test that channel is created - ch := elector.LeadershipChannel() - if ch == nil { - t.Error("leadership channel should not be nil") - } } }) } } -func TestRedisElector_GetLeaderID_NoLeader(t *testing.T) { - client := newTestRedis(t) - - log := logrus.New() - log.SetLevel(logrus.ErrorLevel) - - // Clean up any existing keys - ctx := context.Background() - client.Del(ctx, "test:leader:no-leader") - - config := &leaderelection.Config{ - TTL: 5 * time.Second, - RenewalInterval: 1 * time.Second, - NodeID: "test-node", - } - - elector, err := leaderelection.NewRedisElector(client, log, "test:leader:no-leader", config) - if err != nil { - t.Fatalf("failed to create elector: %v", err) - } - - leaderID, err := elector.GetLeaderID() - if err == nil { - t.Error("expected error when no leader exists") - } - - if leaderID != "" { - t.Errorf("expected empty leader ID, got %s", leaderID) - } -} - func TestRedisElector_StartStop(t *testing.T) { client := newTestRedis(t) @@ -214,6 +178,15 @@ func TestRedisElector_LeadershipAcquisition(t *testing.T) { t.Fatalf("failed to create elector: %v", err) } + // Track leadership via callback + var leadershipGained atomic.Bool + + elector.OnLeadershipChange(func(_ context.Context, isLeader bool) { + if isLeader { + leadershipGained.Store(true) + } + }) + startCtx, startCancel := context.WithTimeout(context.Background(), 3*time.Second) defer startCancel() @@ -224,32 +197,15 @@ func TestRedisElector_LeadershipAcquisition(t *testing.T) { } // Wait for leadership acquisition - leadershipChan := elector.LeadershipChannel() - - select { - case isLeader := <-leadershipChan: - if !isLeader { - t.Error("expected to gain leadership") - } - case <-time.After(1 * time.Second): - t.Error("timeout waiting for leadership acquisition") - } + require.Eventually(t, func() bool { + return leadershipGained.Load() + }, 1*time.Second, 50*time.Millisecond, "should gain leadership via callback") // Verify leadership status if !elector.IsLeader() { t.Error("elector should be leader") } - // Verify leader ID - leaderID, err := elector.GetLeaderID() - if err != nil { - t.Errorf("failed to get leader ID: %v", err) - } - - if leaderID != config.NodeID { - t.Errorf("expected leader ID %s, got %s", config.NodeID, leaderID) - } - // Stop and clean up stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) defer stopCancel() @@ -363,6 +319,14 @@ func TestRedisElector_LeadershipTransition(t *testing.T) { t.Fatalf("failed to create elector1: %v", err) } + var elector1Gained atomic.Bool + + elector1.OnLeadershipChange(func(_ context.Context, isLeader bool) { + if isLeader { + elector1Gained.Store(true) + } + }) + startCtx, startCancel := context.WithTimeout(context.Background(), 5*time.Second) defer startCancel() @@ -373,7 +337,9 @@ func TestRedisElector_LeadershipTransition(t *testing.T) { } // Wait for leadership - time.Sleep(200 * time.Millisecond) + require.Eventually(t, func() bool { + return elector1Gained.Load() + }, 1*time.Second, 50*time.Millisecond, "elector1 should gain leadership") if !elector1.IsLeader() { t.Error("elector1 should be leader") @@ -400,6 +366,14 @@ func TestRedisElector_LeadershipTransition(t *testing.T) { t.Fatalf("failed to create elector2: %v", err) } + var elector2Gained atomic.Bool + + elector2.OnLeadershipChange(func(_ context.Context, isLeader bool) { + if isLeader { + elector2Gained.Store(true) + } + }) + // Start second elector err = elector2.Start(startCtx) if err != nil { @@ -407,31 +381,15 @@ func TestRedisElector_LeadershipTransition(t *testing.T) { } // Wait for leadership transition - leadershipChan := elector2.LeadershipChannel() - - select { - case isLeader := <-leadershipChan: - if !isLeader { - t.Error("elector2 should gain leadership") - } - case <-time.After(3 * time.Second): - t.Error("timeout waiting for leadership transition") - } + require.Eventually(t, func() bool { + return elector2Gained.Load() + }, 3*time.Second, 100*time.Millisecond, "elector2 should gain leadership") // Verify new leader if !elector2.IsLeader() { t.Error("elector2 should be leader") } - leaderID, err := elector2.GetLeaderID() - if err != nil { - t.Errorf("failed to get leader ID: %v", err) - } - - if leaderID != "node-2" { - t.Errorf("expected leader ID node-2, got %s", leaderID) - } - // Clean up stopCtx2, stopCancel2 := context.WithTimeout(context.Background(), 2*time.Second) defer stopCancel2() @@ -467,6 +425,19 @@ func TestRedisElector_RenewalFailure(t *testing.T) { elector, err := leaderelection.NewRedisElector(client, log, "test:leader:renewal-failure", config) require.NoError(t, err) + var ( + gained atomic.Bool + lost atomic.Bool + ) + + elector.OnLeadershipChange(func(_ context.Context, isLeader bool) { + if isLeader { + gained.Store(true) + } else { + lost.Store(true) + } + }) + startCtx, startCancel := context.WithTimeout(context.Background(), 2*time.Second) defer startCancel() @@ -475,27 +446,21 @@ func TestRedisElector_RenewalFailure(t *testing.T) { require.NoError(t, err) // Wait for leadership acquisition - leadershipChan := elector.LeadershipChannel() - select { - case isLeader := <-leadershipChan: - assert.True(t, isLeader, "Should gain leadership") - case <-time.After(1 * time.Second): - t.Fatal("Timeout waiting for leadership") - } + require.Eventually(t, func() bool { + return gained.Load() + }, 1*time.Second, 50*time.Millisecond, "Should gain leadership") // Verify leadership assert.True(t, elector.IsLeader()) - // Simulate external interference - delete the key - client.Del(ctx, "test:leader:renewal-failure") + // Simulate external interference - set a different value to simulate another node holding lock + // (just deleting doesn't work because WithSetNXOnExtend recreates the key) + client.Set(ctx, "test:leader:renewal-failure", "different-node-value", config.TTL) // Wait for leadership loss detection - select { - case isLeader := <-leadershipChan: - assert.False(t, isLeader, "Should lose leadership") - case <-time.After(2 * time.Second): - t.Fatal("Timeout waiting for leadership loss") - } + require.Eventually(t, func() bool { + return lost.Load() + }, 2*time.Second, 50*time.Millisecond, "Should lose leadership") // Verify leadership loss assert.False(t, elector.IsLeader()) @@ -530,7 +495,7 @@ func TestRedisElector_ConcurrentElectors(t *testing.T) { } // Create multiple electors - for i := 0; i < numElectors; i++ { + for i := range numElectors { nodeConfig := *config nodeConfig.NodeID = fmt.Sprintf("node-%d", i) @@ -545,6 +510,7 @@ func TestRedisElector_ConcurrentElectors(t *testing.T) { // Start all electors simultaneously var wg sync.WaitGroup + for i, elector := range electors { wg.Add(1) @@ -566,24 +532,15 @@ func TestRedisElector_ConcurrentElectors(t *testing.T) { // Exactly one should be leader leaderCount := 0 - var leader *leaderelection.RedisElector - for i, elector := range electors { if elector.IsLeader() { leaderCount++ - leader = elector t.Logf("Elector %d is leader", i) } } assert.Equal(t, 1, leaderCount, "Exactly one elector should be leader") - require.NotNil(t, leader) - - // Verify leader ID - leaderID, err := leader.GetLeaderID() - assert.NoError(t, err) - assert.NotEmpty(t, leaderID) // Stop all electors stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -732,10 +689,6 @@ func TestRedisElector_InvalidRedisAddress(t *testing.T) { isLeader := elector.IsLeader() assert.False(t, isLeader) - leaderID, err := elector.GetLeaderID() - assert.Error(t, err) // Should error due to connection failure - assert.Empty(t, leaderID) - // Stop should work even with connection issues stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) defer stopCancel() @@ -743,3 +696,380 @@ func TestRedisElector_InvalidRedisAddress(t *testing.T) { err = elector.Stop(stopCtx) assert.NoError(t, err) } + +// ===================================== +// CALLBACK-BASED NOTIFICATION TESTS +// ===================================== + +// TestOnLeadershipChange_CallbackInvocation verifies that registered callbacks +// are invoked when leadership status changes. +func TestOnLeadershipChange_CallbackInvocation(t *testing.T) { + client := newTestRedis(t) + + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ctx := context.Background() + keyName := "test:leader:callback-invocation" + client.Del(ctx, keyName) + + config := &leaderelection.Config{ + TTL: 500 * time.Millisecond, + RenewalInterval: 100 * time.Millisecond, + NodeID: "callback-test-node", + } + + elector, err := leaderelection.NewRedisElector(client, log, keyName, config) + require.NoError(t, err) + + // Track callback invocations + var ( + callbackInvocations []bool + callbackMu sync.Mutex + ) + + elector.OnLeadershipChange(func(_ context.Context, isLeader bool) { + callbackMu.Lock() + defer callbackMu.Unlock() + + callbackInvocations = append(callbackInvocations, isLeader) + }) + + startCtx, startCancel := context.WithCancel(context.Background()) + defer startCancel() + + err = elector.Start(startCtx) + require.NoError(t, err) + + // Wait for leadership acquisition + time.Sleep(200 * time.Millisecond) + + // Should have gained leadership + require.True(t, elector.IsLeader(), "Should be leader") + + // Force leadership loss by setting a different value to simulate another node holding lock + // (just deleting doesn't work because WithSetNXOnExtend recreates the key) + client.Set(ctx, keyName, "different-node-value", config.TTL) + time.Sleep(300 * time.Millisecond) // Wait for renewal cycle to detect loss + + // Should have lost leadership + require.False(t, elector.IsLeader(), "Should have lost leadership") + + // Check callback invocations + callbackMu.Lock() + defer callbackMu.Unlock() + + require.GreaterOrEqual(t, len(callbackInvocations), 2, + "Should have at least 2 callback invocations (gain + loss)") + + // First should be gain (true), last should be loss (false) + assert.True(t, callbackInvocations[0], "First callback should be leadership gain") + assert.False(t, callbackInvocations[len(callbackInvocations)-1], "Last callback should be leadership loss") + + // Cleanup + stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) + defer stopCancel() + + _ = elector.Stop(stopCtx) +} + +// TestOnLeadershipChange_MultipleCallbacks verifies that multiple callbacks +// are all invoked in registration order. +func TestOnLeadershipChange_MultipleCallbacks(t *testing.T) { + client := newTestRedis(t) + + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ctx := context.Background() + keyName := "test:leader:multiple-callbacks" + client.Del(ctx, keyName) + + config := &leaderelection.Config{ + TTL: 500 * time.Millisecond, + RenewalInterval: 100 * time.Millisecond, + NodeID: "multi-callback-node", + } + + elector, err := leaderelection.NewRedisElector(client, log, keyName, config) + require.NoError(t, err) + + // Track callback order + var ( + invocationOrder []int + orderMu sync.Mutex + ) + + // Register multiple callbacks + for i := range 3 { + callbackID := i + + elector.OnLeadershipChange(func(_ context.Context, _ bool) { + orderMu.Lock() + defer orderMu.Unlock() + + invocationOrder = append(invocationOrder, callbackID) + }) + } + + startCtx, startCancel := context.WithCancel(context.Background()) + defer startCancel() + + err = elector.Start(startCtx) + require.NoError(t, err) + + // Wait for leadership acquisition + time.Sleep(200 * time.Millisecond) + + // Check invocation order + orderMu.Lock() + defer orderMu.Unlock() + + // Should have 3 invocations for the leadership gain + require.GreaterOrEqual(t, len(invocationOrder), 3, "All 3 callbacks should be invoked") + + // First 3 should be in order 0, 1, 2 + assert.Equal(t, 0, invocationOrder[0], "First callback should be invoked first") + assert.Equal(t, 1, invocationOrder[1], "Second callback should be invoked second") + assert.Equal(t, 2, invocationOrder[2], "Third callback should be invoked third") + + // Cleanup + stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) + defer stopCancel() + + _ = elector.Stop(stopCtx) +} + +// TestOnLeadershipChange_GuaranteedDelivery verifies that callbacks receive +// ALL leadership events, even when they would overflow a channel buffer. +func TestOnLeadershipChange_GuaranteedDelivery(t *testing.T) { + client := newTestRedis(t) + + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ctx := context.Background() + keyName := "test:leader:callback-guaranteed" + client.Del(ctx, keyName) + + // Very fast renewal to generate many events + config := &leaderelection.Config{ + TTL: 200 * time.Millisecond, + RenewalInterval: 50 * time.Millisecond, + NodeID: "guaranteed-callback-node", + } + + elector, err := leaderelection.NewRedisElector(client, log, keyName, config) + require.NoError(t, err) + + // Track ALL callback invocations + var ( + callbackEvents []bool + eventsMu sync.Mutex + ) + + elector.OnLeadershipChange(func(_ context.Context, isLeader bool) { + eventsMu.Lock() + defer eventsMu.Unlock() + + callbackEvents = append(callbackEvents, isLeader) + }) + + startCtx, startCancel := context.WithCancel(context.Background()) + defer startCancel() + + err = elector.Start(startCtx) + require.NoError(t, err) + + // Wait for initial leadership + time.Sleep(100 * time.Millisecond) + + // Rapidly toggle leadership by setting different values and deleting + // This generates more events than a channel buffer (10) could hold + for range 15 { + // Set a different value to simulate another node taking the lock + client.Set(ctx, keyName, "different-node-value", config.TTL) + time.Sleep(60 * time.Millisecond) // Let it detect loss + + // Delete to allow re-acquire + client.Del(ctx, keyName) + time.Sleep(60 * time.Millisecond) // Let it re-acquire + } + + // Final - set different value to ensure not leader + client.Set(ctx, keyName, "different-node-value", config.TTL) + time.Sleep(100 * time.Millisecond) + + // Check final state + finalIsLeader := elector.IsLeader() + + eventsMu.Lock() + defer eventsMu.Unlock() + + t.Logf("Callback events received: %d, Final IsLeader(): %v", len(callbackEvents), finalIsLeader) + + // CRITICAL: Callback should have received many events + // More than the channel buffer size (10) proves guaranteed delivery + assert.Greater(t, len(callbackEvents), 10, + "Callbacks should receive more events than channel buffer size") + + // If final state is not leader, the last callback event must be false + if !finalIsLeader && len(callbackEvents) > 0 { + lastEvent := callbackEvents[len(callbackEvents)-1] + assert.False(t, lastEvent, + "CRITICAL: IsLeader()=false but last callback event was 'true' - events were lost!") + } + + // Cleanup + stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) + defer stopCancel() + + _ = elector.Stop(stopCtx) +} + +// TestOnLeadershipChange_SlowCallback verifies that slow callbacks work correctly +// (though they may delay leadership renewal). +func TestOnLeadershipChange_SlowCallback(t *testing.T) { + client := newTestRedis(t) + + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ctx := context.Background() + keyName := "test:leader:slow-callback" + client.Del(ctx, keyName) + + config := &leaderelection.Config{ + TTL: 1 * time.Second, + RenewalInterval: 200 * time.Millisecond, + NodeID: "slow-callback-node", + } + + elector, err := leaderelection.NewRedisElector(client, log, keyName, config) + require.NoError(t, err) + + // Track callback invocations with a slow callback + var ( + callbackEvents []bool + eventsMu sync.Mutex + ) + + elector.OnLeadershipChange(func(_ context.Context, isLeader bool) { + // Slow callback - takes 150ms (less than renewal interval for safety) + // This simulates a callback that does significant work + time.Sleep(150 * time.Millisecond) + + eventsMu.Lock() + defer eventsMu.Unlock() + + callbackEvents = append(callbackEvents, isLeader) + }) + + startCtx, startCancel := context.WithCancel(context.Background()) + defer startCancel() + + err = elector.Start(startCtx) + require.NoError(t, err) + + // Wait for leadership (callback takes 150ms, so wait longer) + time.Sleep(500 * time.Millisecond) + + // Should have gained leadership despite slow callback + require.True(t, elector.IsLeader(), "Should be leader") + + // Force leadership loss by setting a different value to simulate another node + client.Set(ctx, keyName, "different-node-value", config.TTL) + time.Sleep(600 * time.Millisecond) // Wait for renewal + slow callback + + // Should have lost leadership + require.False(t, elector.IsLeader(), "Should have lost leadership") + + // Check callback invocations + eventsMu.Lock() + defer eventsMu.Unlock() + + require.GreaterOrEqual(t, len(callbackEvents), 2, + "Should have at least 2 callback invocations despite slow callback") + + assert.True(t, callbackEvents[0], "First event should be leadership gain") + assert.False(t, callbackEvents[len(callbackEvents)-1], "Last event should be leadership loss") + + // Cleanup + stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) + defer stopCancel() + + _ = elector.Stop(stopCtx) +} + +// TestCallback_GuaranteedDelivery verifies that leadership loss events +// are ALWAYS delivered, even under contention. +// +// This is the contract that distributed systems depend on: +// If IsLeader() returns false, the consumer MUST have been notified. +func TestCallback_GuaranteedDelivery(t *testing.T) { + client := newTestRedis(t) + + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + ctx := context.Background() + + // Run multiple iterations to catch race conditions + for iteration := range 10 { + keyName := "test:leader:guaranteed" + client.Del(ctx, keyName) + + config := &leaderelection.Config{ + TTL: 300 * time.Millisecond, + RenewalInterval: 50 * time.Millisecond, + NodeID: "guaranteed-node", + } + + elector, err := leaderelection.NewRedisElector(client, log, keyName, config) + require.NoError(t, err) + + startCtx, startCancel := context.WithCancel(context.Background()) + + // Track whether we've been notified of loss + var notifiedOfLoss atomic.Bool + + elector.OnLeadershipChange(func(_ context.Context, isLeader bool) { + if !isLeader { + notifiedOfLoss.Store(true) + } + }) + + err = elector.Start(startCtx) + require.NoError(t, err) + + // Wait for leadership + time.Sleep(100 * time.Millisecond) + + if elector.IsLeader() { + // Force leadership loss by setting a different value to simulate another node + client.Set(ctx, keyName, "different-node-value", config.TTL) + time.Sleep(150 * time.Millisecond) + + // If we're no longer leader, we MUST have been notified + if !elector.IsLeader() { + // Give callback a moment to process + time.Sleep(50 * time.Millisecond) + + if !notifiedOfLoss.Load() { + t.Fatalf("CRITICAL (iteration %d): IsLeader()=false but consumer was NOT notified of loss!", + iteration) + } + } + } + + // Cleanup + stopCtx, stopCancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + + _ = elector.Stop(stopCtx) + + stopCancel() + startCancel() + + client.Del(ctx, keyName) + } +} diff --git a/pkg/processor/config.go b/pkg/processor/config.go index 47be50e..b0c6ad8 100644 --- a/pkg/processor/config.go +++ b/pkg/processor/config.go @@ -7,6 +7,7 @@ import ( "github.com/ethpandaops/execution-processor/pkg/processor/tracker" "github.com/ethpandaops/execution-processor/pkg/processor/transaction/simple" "github.com/ethpandaops/execution-processor/pkg/processor/transaction/structlog" + "github.com/ethpandaops/execution-processor/pkg/processor/transaction/structlog_agg" ) // Config holds the unified processor configuration. @@ -28,8 +29,9 @@ type Config struct { BackpressureHysteresis float64 `yaml:"backpressureHysteresis"` // Processor configurations - TransactionStructlog structlog.Config `yaml:"transactionStructlog"` - TransactionSimple simple.Config `yaml:"transactionSimple"` + TransactionStructlog structlog.Config `yaml:"transactionStructlog"` + TransactionSimple simple.Config `yaml:"transactionSimple"` + TransactionStructlogAgg structlog_agg.Config `yaml:"transactionStructlogAgg"` } // LeaderElectionConfig holds configuration for leader election. @@ -54,10 +56,7 @@ type WorkerConfig struct { } func (c *Config) Validate() error { - if c.Interval == 0 { - c.Interval = DefaultInterval - } - + // Interval 0 = no delay (default), >0 = fixed interval between processing cycles if c.Mode == "" { c.Mode = tracker.FORWARDS_MODE } @@ -112,5 +111,9 @@ func (c *Config) Validate() error { return fmt.Errorf("transaction simple config validation failed: %w", err) } + if err := c.TransactionStructlogAgg.Validate(); err != nil { + return fmt.Errorf("transaction structlog_agg config validation failed: %w", err) + } + return nil } diff --git a/pkg/processor/defaults.go b/pkg/processor/defaults.go index bd9e5dc..29e4f05 100644 --- a/pkg/processor/defaults.go +++ b/pkg/processor/defaults.go @@ -5,8 +5,9 @@ import "time" // Default configuration values for the processor manager. // Processor-specific defaults are in the tracker package. const ( - // DefaultInterval is the default interval between processing cycles. - DefaultInterval = 10 * time.Second + // DefaultNoWorkBackoff is the backoff duration when no work is available. + // Used in zero-interval mode to prevent CPU spin when idle. + DefaultNoWorkBackoff = 10 * time.Millisecond // DefaultConcurrency is the default number of concurrent workers for task processing. DefaultConcurrency = 20 @@ -24,4 +25,13 @@ const ( // DefaultLeaderRenewalInterval is the default renewal interval for leader election. DefaultLeaderRenewalInterval = 3 * time.Second + + // DefaultBackpressureBackoffMin is the minimum backoff duration when backpressure is detected. + DefaultBackpressureBackoffMin = 10 * time.Millisecond + + // DefaultBackpressureBackoffMax is the maximum backoff duration when backpressure persists. + DefaultBackpressureBackoffMax = 1 * time.Second + + // DefaultBackpressureJitterFraction is the fraction of backoff to add as random jitter (0.25 = 25%). + DefaultBackpressureJitterFraction = 0.25 ) diff --git a/pkg/processor/manager.go b/pkg/processor/manager.go index 01b771e..17d15f3 100644 --- a/pkg/processor/manager.go +++ b/pkg/processor/manager.go @@ -27,6 +27,7 @@ import ( "context" "fmt" "math/big" + "math/rand/v2" "strings" "sync" "time" @@ -37,6 +38,7 @@ import ( "github.com/ethpandaops/execution-processor/pkg/processor/tracker" transaction_simple "github.com/ethpandaops/execution-processor/pkg/processor/transaction/simple" transaction_structlog "github.com/ethpandaops/execution-processor/pkg/processor/transaction/structlog" + transaction_structlog_agg "github.com/ethpandaops/execution-processor/pkg/processor/transaction/structlog_agg" s "github.com/ethpandaops/execution-processor/pkg/state" "github.com/hibiken/asynq" r "github.com/redis/go-redis/v9" @@ -80,9 +82,8 @@ type Manager struct { network *ethereum.Network // Leader election - leaderElector leaderelection.Elector - isLeader bool - leadershipChange chan bool + leaderElector leaderelection.Elector + isLeader bool stopChan chan struct{} blockProcessStop chan struct{} @@ -101,6 +102,9 @@ type Manager struct { // Track queue high water marks queueHighWaterMarks map[string]int queueMetricsMutex sync.RWMutex + + // Backpressure backoff tracking + backpressureBackoff time.Duration } func NewManager(log logrus.FieldLogger, config *Config, pool *ethereum.Pool, state *s.Manager, redis *r.Client, redisPrefix string) (*Manager, error) { @@ -140,7 +144,6 @@ func NewManager(log logrus.FieldLogger, config *Config, pool *ethereum.Pool, sta redisPrefix: redisPrefix, asynqClient: asynqClient, asynqServer: asynqServer, - leadershipChange: make(chan bool, 1), stopChan: make(chan struct{}), blockProcessStop: make(chan struct{}), queueHighWaterMarks: make(map[string]int), @@ -150,6 +153,13 @@ func NewManager(log logrus.FieldLogger, config *Config, pool *ethereum.Pool, sta func (m *Manager) Start(ctx context.Context) error { m.log.Info("Starting processor manager") + // Start state manager (idempotent - safe to call even if already started by server.go). + // This ensures ClickHouse connections are established for embedded mode where + // the processor is started directly without the server wrapper. + if err := m.state.Start(ctx); err != nil { + return fmt.Errorf("failed to start state manager: %w", err) + } + // wait for execution node to be healthy node, err := m.pool.WaitForHealthyExecutionNode(ctx) if err != nil { @@ -160,7 +170,7 @@ func (m *Manager) Start(ctx context.Context) error { return fmt.Errorf("no healthy execution node available") } - m.network, err = m.pool.GetNetworkByChainID(node.Metadata().ChainID()) + m.network, err = m.pool.GetNetworkByChainID(node.ChainID()) if err != nil { return fmt.Errorf("failed to get network by chain ID: %w", err) } @@ -209,21 +219,21 @@ func (m *Manager) Start(ctx context.Context) error { return fmt.Errorf("failed to create leader elector: %w", err) } + // Register callback for guaranteed leadership notification + m.leaderElector.OnLeadershipChange(func(ctx context.Context, isLeader bool) { + if isLeader { + m.handleLeadershipGain(ctx) + } else { + m.handleLeadershipLoss() + } + }) + // Start leader election if err := m.leaderElector.Start(ctx); err != nil { return fmt.Errorf("failed to start leader election: %w", err) } - // Monitor leadership changes - m.wg.Add(1) - - go func() { - defer m.wg.Done() - - m.monitorLeadership(ctx) - }() - - m.log.Debug("Leader election started, monitoring for leadership changes") + m.log.Debug("Leader election started with callback-based notification") } else { // If leader election is disabled, always act as leader m.isLeader = true @@ -430,6 +440,37 @@ func (m *Manager) initializeProcessors(ctx context.Context) error { m.log.Debug("Transaction simple processor is disabled") } + // Initialize transaction structlog_agg processor if enabled + if m.config.TransactionStructlogAgg.Enabled { + m.log.Debug("Transaction structlog_agg processor is enabled, initializing...") + + processor, err := transaction_structlog_agg.New(&transaction_structlog_agg.Dependencies{ + Log: m.log.WithField("processor", "transaction_structlog_agg"), + Pool: m.pool, + State: m.state, + AsynqClient: m.asynqClient, + RedisClient: m.redisClient, + Network: m.network, + RedisPrefix: m.redisPrefix, + }, &m.config.TransactionStructlogAgg) + if err != nil { + return fmt.Errorf("failed to create transaction_structlog_agg processor: %w", err) + } + + m.processors["transaction_structlog_agg"] = processor + + // Set processing mode from config + processor.SetProcessingMode(m.config.Mode) + + m.log.WithField("processor", "transaction_structlog_agg").Info("Initialized processor") + + if err := m.startProcessorWithRetry(ctx, processor, "transaction_structlog_agg"); err != nil { + return fmt.Errorf("failed to start transaction_structlog_agg processor: %w", err) + } + } else { + m.log.Debug("Transaction structlog_agg processor is disabled") + } + m.log.WithField("total_processors", len(m.processors)).Info("Completed processor initialization") return nil @@ -478,7 +519,9 @@ func (m *Manager) startProcessorWithRetry(ctx context.Context, processor tracker } } -func (m *Manager) processBlocks(ctx context.Context) { +// processBlocks processes the next block for all registered processors. +// Returns true if any processor successfully processed a block, false otherwise. +func (m *Manager) processBlocks(ctx context.Context) bool { m.log.WithField("processor_count", len(m.processors)).Debug("Starting to process blocks") // Check if we should skip due to queue backpressure @@ -488,7 +531,7 @@ func (m *Manager) processBlocks(ctx context.Context) { "max_queue_size": m.config.MaxProcessQueueSize, }).Warn("Skipping block processing due to queue backpressure") - return + return false } // Get execution node head for head distance calculation @@ -501,6 +544,8 @@ func (m *Manager) processBlocks(ctx context.Context) { } } + workDone := false + for name, processor := range m.processors { m.log.WithField("processor", name).Debug("Processing next block for processor") @@ -516,6 +561,8 @@ func (m *Manager) processBlocks(ctx context.Context) { common.ProcessorErrors.WithLabelValues(m.network.Name, name, "process_block", "processing").Inc() } else { + workDone = true + // Track processing duration duration := time.Since(startTime) @@ -531,6 +578,8 @@ func (m *Manager) processBlocks(ctx context.Context) { // Update head distance metric (regardless of success/failure to track current distance) m.updateHeadDistanceMetric(ctx, name, executionHead) } + + return workDone } // updateHeadDistanceMetric calculates and updates the head distance metric for a processor. @@ -577,35 +626,6 @@ func (m *Manager) setupWorkerHandlers() (*asynq.ServeMux, error) { return mux, nil } -func (m *Manager) monitorLeadership(ctx context.Context) { - m.log.Debug("Started monitoring leadership changes") - - leadershipChan := m.leaderElector.LeadershipChannel() - - for { - select { - case <-ctx.Done(): - m.log.Debug("Context cancelled in monitorLeadership") - - return - case isLeader, ok := <-leadershipChan: - if !ok { - m.log.Debug("Leadership channel closed") - - return - } - - m.log.WithField("isLeader", isLeader).Debug("Received leadership change event") - - if isLeader { - m.handleLeadershipGain(ctx) - } else { - m.handleLeadershipLoss() - } - } - } -} - func (m *Manager) handleLeadershipGain(ctx context.Context) { m.blockProcessMutex.Lock() defer m.blockProcessMutex.Unlock() @@ -681,10 +701,7 @@ func (m *Manager) runBlockProcessing(ctx context.Context) { m.log.Debug("Context is active, proceeding with block processing") } - blockTicker := time.NewTicker(m.config.Interval) queueMonitorTicker := time.NewTicker(30 * time.Second) // Monitor queues every 30s - - defer blockTicker.Stop() defer queueMonitorTicker.Stop() m.log.WithFields(logrus.Fields{ @@ -721,24 +738,65 @@ func (m *Manager) runBlockProcessing(ctx context.Context) { m.log.Info("Block processing stopped") return - case <-blockTicker.C: - // Only process if we're still the leader - if m.isLeader { - m.log.Debug("Block processing ticker fired") - m.processBlocks(ctx) - } else { + case <-queueMonitorTicker.C: + m.log.Debug("Queue monitoring ticker fired") + m.startQueueMonitoring(ctx) + default: + if !m.isLeader { m.log.Warn("No longer leader but block processing still running - stopping") return } - case <-queueMonitorTicker.C: - // Monitor queue health - m.log.Debug("Queue monitoring ticker fired") - m.startQueueMonitoring(ctx) + + workDone := m.processBlocks(ctx) + + if m.config.Interval > 0 { + // Fixed interval mode - always sleep the configured duration + time.Sleep(m.config.Interval) + m.backpressureBackoff = 0 // Reset backoff in fixed interval mode + } else if !workDone { + // Zero interval mode with no work - exponential backoff with jitter + m.backpressureBackoff = m.calculateBackpressureBackoff(m.backpressureBackoff) + time.Sleep(m.backpressureBackoff) + } else { + // Zero interval with work done - reset backoff and loop immediately + m.backpressureBackoff = 0 + } } } } +// calculateBackpressureBackoff calculates the next backoff duration using exponential backoff with jitter. +func (m *Manager) calculateBackpressureBackoff(current time.Duration) time.Duration { + var next time.Duration + + if current == 0 { + // Start with minimum backoff + next = DefaultBackpressureBackoffMin + } else { + // Double the current backoff (exponential) + next = current * 2 + } + + // Cap at maximum backoff + if next > DefaultBackpressureBackoffMax { + next = DefaultBackpressureBackoffMax + } + + // Add jitter (up to 25% of the backoff duration) + //nolint:gosec // G404: Weak RNG is fine for backoff jitter - no security requirement + jitter := time.Duration(rand.Float64() * DefaultBackpressureJitterFraction * float64(next)) + next += jitter + + m.log.WithFields(logrus.Fields{ + "previous_backoff": current, + "next_backoff": next, + "jitter": jitter, + }).Debug("Calculated backpressure backoff") + + return next +} + // monitorQueues monitors queue health and archived items. func (m *Manager) monitorQueues(ctx context.Context) { // Get Redis options for Asynq Inspector @@ -1070,7 +1128,7 @@ func (m *Manager) shouldSkipBlockProcessing(ctx context.Context) (bool, string) // GetQueueName returns the current queue name based on processing mode. func (m *Manager) GetQueueName() string { // For now we only have one processor - processorName := "transaction-structlog" + processorName := "transaction_structlog" if m.config.Mode == tracker.BACKWARDS_MODE { return tracker.PrefixedProcessBackwardsQueue(processorName, m.redisPrefix) } @@ -1122,6 +1180,13 @@ func (m *Manager) QueueBlockManually(ctx context.Context, processorName string, return nil, fmt.Errorf("failed to enqueue block task for block %d: %w", blockNumber, err) } + case *transaction_structlog_agg.Processor: + // Enqueue transaction tasks using the processor's method + tasksCreated, err = p.EnqueueTransactionTasks(ctx, block) + if err != nil { + return nil, fmt.Errorf("failed to enqueue tasks for block %d: %w", blockNumber, err) + } + default: return nil, fmt.Errorf("processor %s has unsupported type", processorName) } diff --git a/pkg/processor/manager_test.go b/pkg/processor/manager_test.go index c21c97d..06be9c9 100644 --- a/pkg/processor/manager_test.go +++ b/pkg/processor/manager_test.go @@ -3,6 +3,7 @@ package processor_test import ( "context" "sync" + "sync/atomic" "testing" "time" @@ -587,3 +588,372 @@ func TestManager_ConcurrentConfiguration(t *testing.T) { wg.Wait() } + +// ===================================== +// LEADERSHIP TRANSITION RACE CONDITION TESTS +// ===================================== + +const testLeaderKey = "test-prefix:leader:test:forwards" + +// createTestManager is a helper to create a manager for leadership tests. +func createTestManager(t *testing.T, leaderEnabled bool) (*processor.Manager, *redis.Client) { + t.Helper() + + log := logrus.New() + log.SetLevel(logrus.ErrorLevel) + + config := &processor.Config{ + Interval: 50 * time.Millisecond, + Mode: "forwards", + Concurrency: 2, + LeaderElection: processor.LeaderElectionConfig{ + Enabled: leaderEnabled, + TTL: 500 * time.Millisecond, + RenewalInterval: 100 * time.Millisecond, + NodeID: "race-test-node", + }, + TransactionStructlog: structlog.Config{ + Enabled: false, + }, + } + + redisClient := newTestRedis(t) + + poolConfig := ðereum.Config{ + Execution: []*execution.Config{ + { + Name: "test-node", + NodeAddress: "http://localhost:8545", + }, + }, + } + pool := ethereum.NewPool(log.WithField("component", "pool"), "test", poolConfig) + + stateConfig := &state.Config{ + Storage: state.StorageConfig{ + Config: clickhouse.Config{ + Addr: "localhost:9000", + }, + Table: "test_leadership_race_blocks", + }, + Limiter: state.LimiterConfig{ + Enabled: false, + }, + } + + stateManager, err := state.NewManager(log.WithField("component", "state"), stateConfig) + require.NoError(t, err) + + manager, err := processor.NewManager(log, config, pool, stateManager, redisClient, "test-prefix") + require.NoError(t, err) + + return manager, redisClient +} + +// TestManager_ConcurrentStopAndLeadershipLoss tests that Stop() and leadership loss +// don't race to close the same blockProcessStop channel. +// +// This test should expose a potential panic if both paths try to close the same channel. +// The bug is in manager.go where both Stop() and handleLeadershipLoss() have: +// +// if m.blockProcessStop != nil { +// select { +// case <-m.blockProcessStop: +// default: +// close(m.blockProcessStop) +// } +// } +// +// If they race, one can close while the other is checking, causing a panic. +func TestManager_ConcurrentStopAndLeadershipLoss(t *testing.T) { + // Run multiple times to increase chance of hitting the race + for iteration := range 20 { + t.Run("iteration", func(t *testing.T) { + manager, redisClient := createTestManager(t, true) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + // Track panics + var panicCount atomic.Int32 + + // Start the manager + startDone := make(chan struct{}) + + go func() { + defer func() { + if r := recover(); r != nil { + panicCount.Add(1) + t.Logf("Iteration %d: PANIC in Start: %v", iteration, r) + } + + close(startDone) + }() + + _ = manager.Start(ctx) + }() + + // Wait for manager to start and potentially gain leadership + time.Sleep(200 * time.Millisecond) + + // Now trigger concurrent stop and leadership loss + var wg sync.WaitGroup + + // Goroutine 1: Call Stop() + wg.Add(1) + + go func() { + defer func() { + if r := recover(); r != nil { + panicCount.Add(1) + t.Logf("Iteration %d: PANIC in Stop: %v", iteration, r) + } + + wg.Done() + }() + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) + defer stopCancel() + + _ = manager.Stop(stopCtx) + }() + + // Goroutine 2: Simulate leadership loss by deleting the Redis key + wg.Add(1) + + go func() { + defer func() { + if r := recover(); r != nil { + panicCount.Add(1) + t.Logf("Iteration %d: PANIC in leadership loss: %v", iteration, r) + } + + wg.Done() + }() + + // Delete leader key to trigger leadership loss + redisClient.Del(context.Background(), "test-prefix:leader:test:forwards") + }() + + wg.Wait() + <-startDone + + // Check for panics + if panicCount.Load() > 0 { + t.Fatalf("Iteration %d: Detected %d panic(s) during concurrent stop/leadership loss", + iteration, panicCount.Load()) + } + }) + } +} + +// TestManager_RapidLeadershipFlipping tests that rapid leadership gain/loss cycles +// don't cause orphaned goroutines or channel leaks. +// +// The bug: handleLeadershipGain() creates a new blockProcessStop channel each time. +// If leadership flips rapidly, old runBlockProcessing goroutines may be orphaned +// because they're waiting on the old channel that's now unreachable. +func TestManager_RapidLeadershipFlipping(t *testing.T) { + manager, redisClient := createTestManager(t, true) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Start the manager + startDone := make(chan struct{}) + + go func() { + defer close(startDone) + + _ = manager.Start(ctx) + }() + + // Wait for initial startup + time.Sleep(200 * time.Millisecond) + + // Rapidly flip leadership 20 times + for i := range 20 { + // Delete key to force leadership loss + redisClient.Del(context.Background(), testLeaderKey) + time.Sleep(30 * time.Millisecond) + + // The manager should re-acquire leadership automatically + // (since it's the only instance) + time.Sleep(150 * time.Millisecond) + + t.Logf("Flip %d complete", i) + } + + // Now stop - this should NOT hang if goroutines are properly managed + stopDone := make(chan struct{}) + + go func() { + defer close(stopDone) + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) + defer stopCancel() + + err := manager.Stop(stopCtx) + if err != nil { + t.Logf("Stop error: %v", err) + } + }() + + // Wait for stop with timeout - if it hangs, goroutines are orphaned + select { + case <-stopDone: + t.Log("Manager stopped successfully") + case <-time.After(5 * time.Second): + t.Fatal("CRITICAL: Manager.Stop() hung - likely orphaned goroutines waiting on unreachable channels") + } + + cancel() + <-startDone +} + +// TestManager_StopDuringLeadershipTransition tests stopping the manager +// while a leadership transition is in progress. +// +// This can expose races where Stop() tries to close blockProcessStop +// while handleLeadershipGain() is creating a new one. +func TestManager_StopDuringLeadershipTransition(t *testing.T) { + for iteration := range 10 { + t.Run("iteration", func(t *testing.T) { + manager, redisClient := createTestManager(t, true) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + var panicCount atomic.Int32 + + // Start manager + startDone := make(chan struct{}) + + go func() { + defer func() { + if r := recover(); r != nil { + panicCount.Add(1) + t.Logf("Iteration %d: PANIC in Start: %v", iteration, r) + } + + close(startDone) + }() + + _ = manager.Start(ctx) + }() + + // Wait for startup + time.Sleep(150 * time.Millisecond) + + // Start rapid leadership flipping in background + flipDone := make(chan struct{}) + + go func() { + defer close(flipDone) + + for i := 0; i < 10; i++ { + redisClient.Del(context.Background(), testLeaderKey) + time.Sleep(20 * time.Millisecond) + } + }() + + // While flipping, call Stop + time.Sleep(50 * time.Millisecond) + + stopDone := make(chan struct{}) + + go func() { + defer func() { + if r := recover(); r != nil { + panicCount.Add(1) + t.Logf("Iteration %d: PANIC in Stop: %v", iteration, r) + } + + close(stopDone) + }() + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer stopCancel() + + _ = manager.Stop(stopCtx) + }() + + // Wait for everything + <-flipDone + + select { + case <-stopDone: + // Good + case <-time.After(3 * time.Second): + t.Fatalf("Iteration %d: Stop hung during leadership transition", iteration) + } + + cancel() + <-startDone + + if panicCount.Load() > 0 { + t.Fatalf("Iteration %d: Detected %d panic(s)", iteration, panicCount.Load()) + } + }) + } +} + +// TestManager_WaitGroupLeakOnRapidTransitions verifies that the WaitGroup +// count stays balanced during rapid leadership transitions. +// +// The bug: handleLeadershipGain() calls wg.Add(1) and spawns a goroutine. +// If the goroutine doesn't properly exit (e.g., stuck on old channel), +// wg.Wait() in Stop() will hang forever. +func TestManager_WaitGroupLeakOnRapidTransitions(t *testing.T) { + manager, redisClient := createTestManager(t, true) + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + startDone := make(chan struct{}) + + go func() { + defer close(startDone) + + _ = manager.Start(ctx) + }() + + time.Sleep(200 * time.Millisecond) + + // Do many rapid transitions + for i := range 30 { + redisClient.Del(context.Background(), testLeaderKey) + time.Sleep(20 * time.Millisecond) + + // Let it re-acquire + time.Sleep(120 * time.Millisecond) + + if i%10 == 0 { + t.Logf("Completed %d transitions", i) + } + } + + // Stop with a strict timeout + stopComplete := make(chan error, 1) + + go func() { + stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer stopCancel() + + stopComplete <- manager.Stop(stopCtx) + }() + + select { + case err := <-stopComplete: + if err != nil { + t.Logf("Stop returned error: %v", err) + } + + t.Log("Stop completed - WaitGroup balanced correctly") + case <-time.After(5 * time.Second): + t.Fatal("CRITICAL: Stop hung - WaitGroup leak detected (orphaned goroutines)") + } + + cancel() + <-startDone +} diff --git a/pkg/processor/tracker/limiter.go b/pkg/processor/tracker/limiter.go index 82c3f3b..cbfb01d 100644 --- a/pkg/processor/tracker/limiter.go +++ b/pkg/processor/tracker/limiter.go @@ -2,6 +2,7 @@ package tracker import ( "context" + "fmt" "github.com/ethpandaops/execution-processor/pkg/common" "github.com/sirupsen/logrus" @@ -116,3 +117,91 @@ func (l *Limiter) IsBlockedByIncompleteBlocks(ctx context.Context, nextBlock uin return false, nil } + +// GetAvailableCapacity returns how many more blocks can be enqueued before hitting +// the maxPendingBlockRange limit. Returns 0 if at or over capacity. +func (l *Limiter) GetAvailableCapacity(ctx context.Context, nextBlock uint64, mode string) (int, error) { + if l.config.MaxPendingBlockRange <= 0 { + // No limit configured, return max capacity + return l.config.MaxPendingBlockRange, nil + } + + maxPendingBlockRange := uint64(l.config.MaxPendingBlockRange) //nolint:gosec // validated above + + if mode == BACKWARDS_MODE { + // Backwards mode: check distance from newest incomplete block + searchMaxBlock := nextBlock + maxPendingBlockRange + + newestIncomplete, err := l.stateProvider.GetNewestIncompleteBlock( + ctx, l.network, l.processor, searchMaxBlock, + ) + if err != nil { + return 0, err + } + + if newestIncomplete == nil { + // No incomplete blocks, full capacity available + return l.config.MaxPendingBlockRange, nil + } + + distance := *newestIncomplete - nextBlock + if distance >= maxPendingBlockRange { + return 0, nil + } + + //nolint:gosec // Result is bounded by MaxPendingBlockRange which is an int + return int(maxPendingBlockRange - distance), nil + } + + // Forwards mode: check distance from oldest incomplete block + var searchMinBlock uint64 + if nextBlock > maxPendingBlockRange { + searchMinBlock = nextBlock - maxPendingBlockRange + } + + oldestIncomplete, err := l.stateProvider.GetOldestIncompleteBlock( + ctx, l.network, l.processor, searchMinBlock, + ) + if err != nil { + return 0, err + } + + if oldestIncomplete == nil { + // No incomplete blocks, full capacity available + return l.config.MaxPendingBlockRange, nil + } + + distance := nextBlock - *oldestIncomplete + if distance >= maxPendingBlockRange { + return 0, nil + } + + //nolint:gosec // Result is bounded by MaxPendingBlockRange which is an int + return int(maxPendingBlockRange - distance), nil +} + +// ValidateBatchWithinLeash ensures a batch of blocks won't exceed the maxPendingBlockRange. +// Returns an error if the batch would violate the constraint. +func (l *Limiter) ValidateBatchWithinLeash(ctx context.Context, startBlock uint64, count int, mode string) error { + if l.config.MaxPendingBlockRange <= 0 || count <= 0 { + return nil + } + + // The batch spans from startBlock to startBlock + count - 1 + // We need to ensure this range doesn't exceed maxPendingBlockRange + if count > l.config.MaxPendingBlockRange { + return fmt.Errorf("batch size %d exceeds max pending block range %d", count, l.config.MaxPendingBlockRange) + } + + // Check available capacity + capacity, err := l.GetAvailableCapacity(ctx, startBlock, mode) + if err != nil { + return fmt.Errorf("failed to get available capacity: %w", err) + } + + if count > capacity { + return fmt.Errorf("batch size %d exceeds available capacity %d", count, capacity) + } + + return nil +} diff --git a/pkg/processor/tracker/pending.go b/pkg/processor/tracker/pending.go index 446cfa6..48a7afe 100644 --- a/pkg/processor/tracker/pending.go +++ b/pkg/processor/tracker/pending.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strconv" + "time" "github.com/redis/go-redis/v9" "github.com/sirupsen/logrus" @@ -36,16 +37,35 @@ func (t *PendingTracker) blockKey(blockNumber uint64, network, processor, mode s return fmt.Sprintf("%s:block:%s:%s:%s:%d", t.prefix, network, processor, mode, blockNumber) } +// ErrBlockAlreadyBeingProcessed is returned when attempting to initialize a block that is already being processed. +var ErrBlockAlreadyBeingProcessed = fmt.Errorf("block is already being processed") + // InitBlock initializes tracking for a block with the given task count. -// This should be called after enqueueing all tasks for a block. +// Uses SetNX to ensure only one processor can claim a block at a time. +// Returns ErrBlockAlreadyBeingProcessed if the block is already being tracked. +// This should be called BEFORE MarkBlockEnqueued to prevent race conditions. func (t *PendingTracker) InitBlock(ctx context.Context, blockNumber uint64, taskCount int, network, processor, mode string) error { key := t.blockKey(blockNumber, network, processor, mode) - err := t.redis.Set(ctx, key, taskCount, 0).Err() + // Use SetNX to atomically check-and-set - only succeeds if key doesn't exist + // TTL of 30 minutes prevents orphaned keys if processor crashes + wasSet, err := t.redis.SetNX(ctx, key, taskCount, 30*time.Minute).Result() if err != nil { return fmt.Errorf("failed to init block tracking: %w", err) } + if !wasSet { + t.log.WithFields(logrus.Fields{ + "block_number": blockNumber, + "network": network, + "processor": processor, + "mode": mode, + "key": key, + }).Debug("Block already being processed by another worker") + + return ErrBlockAlreadyBeingProcessed + } + t.log.WithFields(logrus.Fields{ "block_number": blockNumber, "task_count": taskCount, @@ -74,7 +94,7 @@ func (t *PendingTracker) DecrementPending(ctx context.Context, blockNumber uint6 "network": network, "processor": processor, "mode": mode, - }).Debug("Decremented pending task count") + }).Trace("Decremented pending task count") return remaining, nil } @@ -120,3 +140,76 @@ func (t *PendingTracker) CleanupBlock(ctx context.Context, blockNumber uint64, n return nil } + +// BlockInit contains the information needed to initialize a block for tracking. +type BlockInit struct { + Number uint64 + TaskCount int +} + +// InitBlocks initializes tracking for multiple blocks atomically via Redis pipeline. +// Uses SetNX to ensure only one processor can claim each block at a time. +// Returns the block numbers that were successfully initialized (those not already being processed). +func (t *PendingTracker) InitBlocks( + ctx context.Context, + blocks []BlockInit, + network, processor, mode string, +) ([]uint64, error) { + if len(blocks) == 0 { + return []uint64{}, nil + } + + // Use pipeline for atomic batch operation + pipe := t.redis.Pipeline() + cmds := make([]*redis.BoolCmd, len(blocks)) + + for i, block := range blocks { + key := t.blockKey(block.Number, network, processor, mode) + // SetNX with 30 minute TTL to prevent orphaned keys + cmds[i] = pipe.SetNX(ctx, key, block.TaskCount, 30*time.Minute) + } + + // Execute pipeline + _, err := pipe.Exec(ctx) + if err != nil && err != redis.Nil { + return nil, fmt.Errorf("failed to execute init blocks pipeline: %w", err) + } + + // Collect successfully initialized block numbers + initialized := make([]uint64, 0, len(blocks)) + + for i, cmd := range cmds { + wasSet, cmdErr := cmd.Result() + if cmdErr != nil && cmdErr != redis.Nil { + t.log.WithError(cmdErr).WithFields(logrus.Fields{ + "block_number": blocks[i].Number, + "network": network, + "processor": processor, + "mode": mode, + }).Warn("Failed to check SetNX result for block") + + continue + } + + if wasSet { + initialized = append(initialized, blocks[i].Number) + + t.log.WithFields(logrus.Fields{ + "block_number": blocks[i].Number, + "task_count": blocks[i].TaskCount, + "network": network, + "processor": processor, + "mode": mode, + }).Debug("Initialized block tracking") + } else { + t.log.WithFields(logrus.Fields{ + "block_number": blocks[i].Number, + "network": network, + "processor": processor, + "mode": mode, + }).Debug("Block already being processed by another worker") + } + } + + return initialized, nil +} diff --git a/pkg/processor/tracker/processor.go b/pkg/processor/tracker/processor.go index 6e86737..b199c51 100644 --- a/pkg/processor/tracker/processor.go +++ b/pkg/processor/tracker/processor.go @@ -65,13 +65,6 @@ const ( // incomplete block and the current block before blocking new block processing. DefaultMaxPendingBlockRange = 2 - // DefaultChunkSize is the default number of rows per ClickHouse insert batch. - DefaultChunkSize = 10000 - - // DefaultProgressLogThreshold is the default threshold for logging progress - // on large transactions (structlog processor). - DefaultProgressLogThreshold = 100000 - // DefaultClickHouseTimeout is the default timeout for ClickHouse operations. DefaultClickHouseTimeout = 30 * time.Second diff --git a/pkg/processor/transaction/simple/block_processing.go b/pkg/processor/transaction/simple/block_processing.go index ac6dc35..b406c3d 100644 --- a/pkg/processor/transaction/simple/block_processing.go +++ b/pkg/processor/transaction/simple/block_processing.go @@ -10,11 +10,14 @@ import ( "github.com/sirupsen/logrus" "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" "github.com/ethpandaops/execution-processor/pkg/processor/tracker" "github.com/ethpandaops/execution-processor/pkg/state" ) -// ProcessNextBlock processes the next available block. +// ProcessNextBlock processes the next available block(s). +// In zero-interval mode, this attempts to fetch and process multiple blocks +// up to the available capacity for improved throughput. func (p *Processor) ProcessNextBlock(ctx context.Context) error { p.log.WithField("network", p.network.Name).Debug("Querying for next block to process") @@ -31,7 +34,7 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { chainHead = new(big.Int).SetUint64(*latestBlockNum) } - // Get next block to process from state manager + // Get next block to determine starting point nextBlock, err := p.stateManager.NextBlock(ctx, p.Name(), p.network.Name, p.processingMode, chainHead) if err != nil { if errors.Is(err, state.ErrNoMoreBlocks) { @@ -49,24 +52,93 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { return nil } - // Distance-based pending block range check - // Only allow processing if distance between oldest incomplete and next block < maxPendingBlockRange - blocked, err := p.IsBlockedByIncompleteBlocks(ctx, nextBlock.Uint64(), p.processingMode) + // Get available capacity for batch processing + capacity, err := p.GetAvailableCapacity(ctx, nextBlock.Uint64(), p.processingMode) if err != nil { - p.log.WithError(err).Warn("Failed to check incomplete blocks distance, proceeding anyway") - } else if blocked { + p.log.WithError(err).Warn("Failed to get available capacity, falling back to single block") + + capacity = 1 + } + + if capacity <= 0 { + p.log.Debug("No capacity available, waiting for tasks to complete") + return nil } + // Get batch of block numbers + blockNumbers, err := p.stateManager.NextBlocks(ctx, p.Name(), p.network.Name, p.processingMode, chainHead, capacity) + if err != nil { + p.log.WithError(err).Warn("Failed to get batch of block numbers, falling back to single block") + + blockNumbers = []*big.Int{nextBlock} + } + + if len(blockNumbers) == 0 { + p.log.Debug("No blocks to process") + + return nil + } + + // Validate batch won't exceed leash + if validateErr := p.ValidateBatchWithinLeash(ctx, blockNumbers[0].Uint64(), len(blockNumbers), p.processingMode); validateErr != nil { + p.log.WithError(validateErr).Warn("Batch validation failed, reducing to single block") + + blockNumbers = blockNumbers[:1] + } + + // Fetch blocks using batch RPC + blocks, err := node.BlocksByNumbers(ctx, blockNumbers) + if err != nil { + p.log.WithError(err).WithField("network", p.network.Name).Error("could not fetch blocks") + + return err + } + + if len(blocks) == 0 { + // No blocks returned - might be at chain tip + return p.handleBlockNotFound(ctx, nextBlock) + } + + p.log.WithFields(logrus.Fields{ + "requested": len(blockNumbers), + "received": len(blocks), + "network": p.network.Name, + }).Debug("Fetched batch of blocks") + + // Process each block, stopping on first error + for _, block := range blocks { + if processErr := p.processBlock(ctx, block); processErr != nil { + return processErr + } + } + + return nil +} + +// handleBlockNotFound handles the case when a block is not found. +func (p *Processor) handleBlockNotFound(_ context.Context, nextBlock *big.Int) error { p.log.WithFields(logrus.Fields{ "block_number": nextBlock.String(), "network": p.network.Name, - }).Debug("Found next block to process") + }).Debug("Block not yet available") + + return fmt.Errorf("block %s not yet available", nextBlock.String()) +} + +// processBlock processes a single block - the core logic extracted from the original ProcessNextBlock. +func (p *Processor) processBlock(ctx context.Context, block execution.Block) error { + blockNumber := block.Number() + + p.log.WithFields(logrus.Fields{ + "block_number": blockNumber.String(), + "network": p.network.Name, + }).Debug("Processing block") // Check if this block was recently processed recentlyProcessed, err := p.stateManager.IsBlockRecentlyProcessed( ctx, - nextBlock.Uint64(), + blockNumber.Uint64(), p.network.Name, p.Name(), 10, @@ -76,37 +148,22 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { } if recentlyProcessed { - p.log.WithField("block", nextBlock.Uint64()).Debug("Block was recently processed, skipping") + p.log.WithField("block", blockNumber.Uint64()).Debug("Block was recently processed, skipping") common.BlockProcessingSkipped.WithLabelValues(p.network.Name, p.Name(), "recently_processed").Inc() - return fmt.Errorf("block %d was recently processed", nextBlock.Uint64()) - } - - // Get block data - block, err := node.BlockByNumber(ctx, nextBlock) - if err != nil { - if tracker.IsBlockNotFoundError(err) { - p.log.WithFields(logrus.Fields{ - "block_number": nextBlock.String(), - "network": p.network.Name, - }).Debug("Block not yet available") - - return fmt.Errorf("block %s not yet available", nextBlock.String()) - } - - return fmt.Errorf("failed to get block %d: %w", nextBlock.Uint64(), err) + return fmt.Errorf("block %d was recently processed", blockNumber.Uint64()) } // Handle empty blocks - mark complete immediately (no task tracking needed) if len(block.Transactions()) == 0 { - p.log.WithField("block", nextBlock.Uint64()).Debug("Empty block, marking as complete") + p.log.WithField("block", blockNumber.Uint64()).Debug("Empty block, marking as complete") - return p.stateManager.MarkBlockComplete(ctx, nextBlock.Uint64(), p.network.Name, p.Name()) + return p.stateManager.MarkBlockComplete(ctx, blockNumber.Uint64(), p.network.Name, p.Name()) } // Enqueue block processing task payload := &ProcessPayload{ - BlockNumber: *nextBlock, + BlockNumber: *blockNumber, NetworkName: p.network.Name, ProcessingMode: p.processingMode, } @@ -134,27 +191,27 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { common.TasksEnqueued.WithLabelValues(p.network.Name, ProcessorName, queue, task.Type()).Inc() // Initialize block tracking in Redis (1 task per block for simple processor) - if err := p.pendingTracker.InitBlock(ctx, nextBlock.Uint64(), 1, p.network.Name, p.Name(), p.processingMode); err != nil { + if err := p.pendingTracker.InitBlock(ctx, blockNumber.Uint64(), 1, p.network.Name, p.Name(), p.processingMode); err != nil { p.log.WithError(err).WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, }).Error("could not init block tracking in Redis") return err } // Mark block as enqueued (phase 1 of two-phase completion) - if err := p.stateManager.MarkBlockEnqueued(ctx, nextBlock.Uint64(), 1, p.network.Name, p.Name()); err != nil { + if err := p.stateManager.MarkBlockEnqueued(ctx, blockNumber.Uint64(), 1, p.network.Name, p.Name()); err != nil { p.log.WithError(err).WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, }).Error("could not mark block as enqueued") return err } p.log.WithFields(logrus.Fields{ - "block_number": nextBlock.Uint64(), + "block_number": blockNumber.Uint64(), "tx_count": len(block.Transactions()), }).Info("Enqueued block for processing") diff --git a/pkg/processor/transaction/simple/config.go b/pkg/processor/transaction/simple/config.go index 6896310..d379e67 100644 --- a/pkg/processor/transaction/simple/config.go +++ b/pkg/processor/transaction/simple/config.go @@ -2,16 +2,27 @@ package simple import ( "fmt" + "time" "github.com/ethpandaops/execution-processor/pkg/clickhouse" ) +// Default buffer configuration values. +const ( + DefaultBufferMaxRows = 100000 + DefaultBufferFlushInterval = time.Second +) + // Config holds configuration for the simple transaction processor. type Config struct { clickhouse.Config `yaml:",inline"` Enabled bool `yaml:"enabled"` Table string `yaml:"table"` + // Row buffer settings for batched ClickHouse inserts + BufferMaxRows int `yaml:"bufferMaxRows"` // Max rows before flush. Default: 100000 + BufferFlushInterval time.Duration `yaml:"bufferFlushInterval"` // Max time before flush. Default: 1s + // Block completion tracking MaxPendingBlockRange int `yaml:"maxPendingBlockRange"` // Max distance between oldest incomplete and current block. Default: 2 } diff --git a/pkg/processor/transaction/simple/handlers.go b/pkg/processor/transaction/simple/handlers.go index f33eef6..0e21b7b 100644 --- a/pkg/processor/transaction/simple/handlers.go +++ b/pkg/processor/transaction/simple/handlers.go @@ -7,11 +7,11 @@ import ( "time" "github.com/ClickHouse/ch-go" - "github.com/ethereum/go-ethereum/core/types" "github.com/hibiken/asynq" "github.com/sirupsen/logrus" "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" "github.com/ethpandaops/execution-processor/pkg/processor/tracker" ) @@ -107,13 +107,13 @@ func (p *Processor) handleProcessTask(ctx context.Context, task *asynq.Task) err blockTxs := block.Transactions() // Build receipt map - try batch first, fall back to per-tx - receiptMap := make(map[string]*types.Receipt, len(blockTxs)) + receiptMap := make(map[string]execution.Receipt, len(blockTxs)) receipts, err := node.BlockReceipts(ctx, blockNumber) if err == nil { // Use batch receipts for _, r := range receipts { - receiptMap[r.TxHash.Hex()] = r + receiptMap[r.TxHash().Hex()] = r } } @@ -179,26 +179,13 @@ func (p *Processor) handleProcessTask(ctx context.Context, task *asynq.Task) err // buildTransactionRow builds a transaction row from block, tx, and receipt data. func (p *Processor) buildTransactionRow( - block *types.Block, - tx *types.Transaction, - receipt *types.Receipt, + block execution.Block, + tx execution.Transaction, + receipt execution.Receipt, index uint64, ) (Transaction, error) { - // Get sender (from) - handle legacy transactions without chain ID - var signer types.Signer - - chainID := tx.ChainId() - if chainID == nil || chainID.Sign() == 0 { - // Legacy transaction without EIP-155 replay protection - signer = types.HomesteadSigner{} - } else { - signer = types.LatestSignerForChainID(chainID) - } - - from, err := types.Sender(signer, tx) - if err != nil { - return Transaction{}, fmt.Errorf("failed to get sender: %w", err) - } + // Get sender (from) - computed by the data source + from := tx.From() // Build to address (nil for contract creation) var toAddress *string @@ -269,7 +256,7 @@ func (p *Processor) buildTransactionRow( Size: txSize, CallDataSize: callDataSize, BlobHashes: []string{}, // Default empty array - Success: receipt.Status == 1, + Success: receipt.Status() == 1, NInputBytes: callDataSize, NInputZeroBytes: nInputZeroBytes, NInputNonzeroBytes: nInputNonzeroBytes, @@ -277,7 +264,7 @@ func (p *Processor) buildTransactionRow( } // Handle blob transaction fields (type 3) - if tx.Type() == types.BlobTxType { + if tx.Type() == execution.BlobTxType { blobGas := tx.BlobGas() txRow.BlobGas = &blobGas @@ -301,11 +288,11 @@ func (p *Processor) buildTransactionRow( // calculateEffectiveGasPrice calculates the effective gas price for a transaction. // For legacy/access list txs: returns tx.GasPrice(). // For EIP-1559+ txs: returns min(max_fee_per_gas, base_fee + max_priority_fee_per_gas). -func calculateEffectiveGasPrice(block *types.Block, tx *types.Transaction) *big.Int { +func calculateEffectiveGasPrice(block execution.Block, tx execution.Transaction) *big.Int { txType := tx.Type() // Legacy and access list transactions use GasPrice directly - if txType == types.LegacyTxType || txType == types.AccessListTxType { + if txType == execution.LegacyTxType || txType == execution.AccessListTxType { if tx.GasPrice() != nil { return tx.GasPrice() } @@ -333,8 +320,8 @@ func calculateEffectiveGasPrice(block *types.Block, tx *types.Transaction) *big. return effectiveGasPrice } -// insertTransactions inserts transactions into ClickHouse using columnar protocol. -func (p *Processor) insertTransactions(ctx context.Context, transactions []Transaction) error { +// flushRows is the flush function for the row buffer. +func (p *Processor) flushRows(ctx context.Context, transactions []Transaction) error { if len(transactions) == 0 { return nil } @@ -344,6 +331,7 @@ func (p *Processor) insertTransactions(ctx context.Context, transactions []Trans defer cancel() cols := NewColumns() + for _, tx := range transactions { cols.Append(tx) } @@ -375,3 +363,8 @@ func (p *Processor) insertTransactions(ctx context.Context, transactions []Trans return nil } + +// insertTransactions submits transactions to the row buffer for batched insertion. +func (p *Processor) insertTransactions(ctx context.Context, transactions []Transaction) error { + return p.rowBuffer.Submit(ctx, transactions) +} diff --git a/pkg/processor/transaction/simple/processor.go b/pkg/processor/transaction/simple/processor.go index ebd8de5..145279d 100644 --- a/pkg/processor/transaction/simple/processor.go +++ b/pkg/processor/transaction/simple/processor.go @@ -12,6 +12,7 @@ import ( "github.com/ethpandaops/execution-processor/pkg/clickhouse" "github.com/ethpandaops/execution-processor/pkg/ethereum" "github.com/ethpandaops/execution-processor/pkg/processor/tracker" + "github.com/ethpandaops/execution-processor/pkg/rowbuffer" "github.com/ethpandaops/execution-processor/pkg/state" ) @@ -45,6 +46,9 @@ type Processor struct { redisPrefix string pendingTracker *tracker.PendingTracker + // Row buffer for batched ClickHouse inserts + rowBuffer *rowbuffer.Buffer[Transaction] + // Embedded limiter for shared blocking/completion logic *tracker.Limiter } @@ -70,6 +74,15 @@ func New(deps *Dependencies, config *Config) (*Processor, error) { config.MaxPendingBlockRange = tracker.DefaultMaxPendingBlockRange } + // Set buffer defaults + if config.BufferMaxRows <= 0 { + config.BufferMaxRows = DefaultBufferMaxRows + } + + if config.BufferFlushInterval <= 0 { + config.BufferFlushInterval = DefaultBufferFlushInterval + } + log := deps.Log.WithField("processor", ProcessorName) pendingTracker := tracker.NewPendingTracker(deps.RedisClient, deps.RedisPrefix, log) @@ -87,7 +100,7 @@ func New(deps *Dependencies, config *Config) (*Processor, error) { }, ) - return &Processor{ + processor := &Processor{ log: log, pool: deps.Pool, stateManager: deps.State, @@ -99,7 +112,29 @@ func New(deps *Dependencies, config *Config) (*Processor, error) { redisPrefix: deps.RedisPrefix, pendingTracker: pendingTracker, Limiter: limiter, - }, nil + } + + // Create the row buffer with the flush function + processor.rowBuffer = rowbuffer.New( + rowbuffer.Config{ + MaxRows: config.BufferMaxRows, + FlushInterval: config.BufferFlushInterval, + Network: deps.Network.Name, + Processor: ProcessorName, + Table: config.Table, + }, + processor.flushRows, + log, + ) + + processor.log.WithFields(logrus.Fields{ + "network": processor.network.Name, + "max_pending_block_range": config.MaxPendingBlockRange, + "buffer_max_rows": config.BufferMaxRows, + "buffer_flush_interval": config.BufferFlushInterval, + }).Info("Simple transaction processor initialized") + + return processor, nil } // Name returns the processor name. @@ -115,6 +150,11 @@ func (p *Processor) Start(ctx context.Context) error { return fmt.Errorf("failed to start ClickHouse client: %w", err) } + // Start the row buffer + if err := p.rowBuffer.Start(ctx); err != nil { + return fmt.Errorf("failed to start row buffer: %w", err) + } + p.log.WithFields(logrus.Fields{ "network": p.network.Name, }).Info("Transaction simple processor ready") @@ -126,6 +166,11 @@ func (p *Processor) Start(ctx context.Context) error { func (p *Processor) Stop(ctx context.Context) error { p.log.Info("Stopping transaction simple processor") + // Stop the row buffer first (flushes remaining rows) + if err := p.rowBuffer.Stop(ctx); err != nil { + p.log.WithError(err).Error("Failed to stop row buffer") + } + return p.clickhouse.Stop() } diff --git a/pkg/processor/transaction/structlog/block_processing.go b/pkg/processor/transaction/structlog/block_processing.go index 55141a7..8873d36 100644 --- a/pkg/processor/transaction/structlog/block_processing.go +++ b/pkg/processor/transaction/structlog/block_processing.go @@ -6,16 +6,18 @@ import ( "fmt" "math/big" - "github.com/ethereum/go-ethereum/core/types" "github.com/hibiken/asynq" "github.com/sirupsen/logrus" "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" "github.com/ethpandaops/execution-processor/pkg/processor/tracker" "github.com/ethpandaops/execution-processor/pkg/state" ) -// ProcessNextBlock processes the next available block. +// ProcessNextBlock processes the next available block(s). +// In zero-interval mode, this attempts to fetch and process multiple blocks +// up to the available capacity for improved throughput. func (p *Processor) ProcessNextBlock(ctx context.Context) error { p.log.WithField("network", p.network.Name).Debug("Querying for next block to process") @@ -33,10 +35,9 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { } } - // Get next block to process from admin.execution_block table + // Get next block to determine starting point nextBlock, err := p.stateManager.NextBlock(ctx, p.Name(), p.network.Name, p.processingMode, chainHead) if err != nil { - // Check if this is the "no more blocks" condition if errors.Is(err, state.ErrNoMoreBlocks) { p.log.Debug("no more blocks to process") @@ -54,87 +55,128 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { return nil } - // Distance-based pending block range check - // Only allow processing if distance between oldest incomplete and next block < maxPendingBlockRange - blocked, err := p.IsBlockedByIncompleteBlocks(ctx, nextBlock.Uint64(), p.processingMode) + // Get available capacity for batch processing + capacity, err := p.GetAvailableCapacity(ctx, nextBlock.Uint64(), p.processingMode) if err != nil { - p.log.WithError(err).Warn("Failed to check incomplete blocks distance, proceeding anyway") - } else if blocked { + p.log.WithError(err).Warn("Failed to get available capacity, falling back to single block") + + capacity = 1 + } + + if capacity <= 0 { + p.log.Debug("No capacity available, waiting for tasks to complete") + return nil } - p.log.WithFields(logrus.Fields{ - "block_number": nextBlock.String(), - "network": p.network.Name, - }).Debug("Found next block to process") + // Get batch of block numbers + blockNumbers, err := p.stateManager.NextBlocks(ctx, p.Name(), p.network.Name, p.processingMode, chainHead, capacity) + if err != nil { + p.log.WithError(err).Warn("Failed to get batch of block numbers, falling back to single block") - // Check if this block was recently processed to avoid rapid reprocessing - if recentlyProcessed, checkErr := p.stateManager.IsBlockRecentlyProcessed(ctx, nextBlock.Uint64(), p.network.Name, p.Name(), 30); checkErr == nil && recentlyProcessed { - p.log.WithFields(logrus.Fields{ - "block_number": nextBlock.String(), - "network": p.network.Name, - }).Debug("Block was recently processed, skipping to prevent rapid reprocessing") + blockNumbers = []*big.Int{nextBlock} + } - common.BlockProcessingSkipped.WithLabelValues(p.network.Name, p.Name(), "recently_processed").Inc() + if len(blockNumbers) == 0 { + p.log.Debug("No blocks to process") return nil } - // Update block metrics - p.updateBlockMetrics(ctx, nextBlock) - - // Get healthy execution node - node = p.pool.GetHealthyExecutionNode() - if node == nil { - p.log.WithField("network", p.network.Name).Error("could not get healthy node") + // Validate batch won't exceed leash + if validateErr := p.ValidateBatchWithinLeash(ctx, blockNumbers[0].Uint64(), len(blockNumbers), p.processingMode); validateErr != nil { + p.log.WithError(validateErr).Warn("Batch validation failed, reducing to single block") - return fmt.Errorf("no healthy execution node available") + blockNumbers = blockNumbers[:1] } - // Get block data - block, err := node.BlockByNumber(ctx, nextBlock) + // Fetch blocks using batch RPC + blocks, err := node.BlocksByNumbers(ctx, blockNumbers) if err != nil { - // Check if this is a "not found" error indicating we're at head - if tracker.IsBlockNotFoundError(err) { - // Check if we're close to chain tip to determine if this is expected - if latestBlock, latestErr := node.BlockNumber(ctx); latestErr == nil && latestBlock != nil { - chainTip := new(big.Int).SetUint64(*latestBlock) - diff := new(big.Int).Sub(nextBlock, chainTip).Int64() - - if diff <= 5 { // Within 5 blocks of chain tip - p.log.WithFields(logrus.Fields{ - "network": p.network.Name, - "block_number": nextBlock, - "chain_tip": chainTip, - "blocks_ahead": diff, - }).Info("Waiting for block to be available on execution node") - - return fmt.Errorf("block %s not yet available (chain tip: %s)", nextBlock, chainTip) - } - } + p.log.WithError(err).WithField("network", p.network.Name).Error("could not fetch blocks") + + return err + } + + if len(blocks) == 0 { + // No blocks returned - might be at chain tip + return p.handleBlockNotFound(ctx, node, nextBlock) + } + + p.log.WithFields(logrus.Fields{ + "requested": len(blockNumbers), + "received": len(blocks), + "network": p.network.Name, + }).Debug("Fetched batch of blocks") + + // Process each block, stopping on first error + for _, block := range blocks { + if processErr := p.processBlock(ctx, block); processErr != nil { + return processErr } + } + + return nil +} + +// handleBlockNotFound handles the case when a block is not found. +func (p *Processor) handleBlockNotFound(ctx context.Context, node execution.Node, nextBlock *big.Int) error { + // Check if we're close to chain tip to determine if this is expected + if latestBlock, latestErr := node.BlockNumber(ctx); latestErr == nil && latestBlock != nil { + chainTip := new(big.Int).SetUint64(*latestBlock) + diff := new(big.Int).Sub(nextBlock, chainTip).Int64() - // Log as error for non-head cases or when we can't determine chain tip - p.log.WithError(err).WithFields(logrus.Fields{ + if diff <= 5 { // Within 5 blocks of chain tip + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": nextBlock, + "chain_tip": chainTip, + "blocks_ahead": diff, + }).Info("Waiting for block to be available on execution node") + + return fmt.Errorf("block %s not yet available (chain tip: %s)", nextBlock, chainTip) + } + } + + return fmt.Errorf("block %s not found", nextBlock) +} + +// processBlock processes a single block - the core logic extracted from the original ProcessNextBlock. +func (p *Processor) processBlock(ctx context.Context, block execution.Block) error { + blockNumber := block.Number() + + // Check if this block was recently processed to avoid rapid reprocessing + if recentlyProcessed, checkErr := p.stateManager.IsBlockRecentlyProcessed(ctx, blockNumber.Uint64(), p.network.Name, p.Name(), 30); checkErr == nil && recentlyProcessed { + p.log.WithFields(logrus.Fields{ + "block_number": blockNumber.String(), "network": p.network.Name, - "block_number": nextBlock, - }).Error("could not get block") + }).Debug("Block was recently processed, skipping to prevent rapid reprocessing") - return err + common.BlockProcessingSkipped.WithLabelValues(p.network.Name, p.Name(), "recently_processed").Inc() + + return nil } + // Update lightweight block height metric + common.BlockHeight.WithLabelValues(p.network.Name, ProcessorName).Set(float64(blockNumber.Int64())) + + p.log.WithFields(logrus.Fields{ + "block_number": blockNumber.String(), + "network": p.network.Name, + }).Debug("Processing block") + // Handle empty blocks - mark complete immediately (no task tracking needed) if len(block.Transactions()) == 0 { p.log.WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, }).Debug("skipping empty block") // Mark the block as complete immediately (no tasks to track) - if markErr := p.stateManager.MarkBlockComplete(ctx, nextBlock.Uint64(), p.network.Name, p.Name()); markErr != nil { + if markErr := p.stateManager.MarkBlockComplete(ctx, blockNumber.Uint64(), p.network.Name, p.Name()); markErr != nil { p.log.WithError(markErr).WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, }).Error("could not mark empty block as complete") return markErr @@ -143,35 +185,63 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { return nil } - // Enqueue tasks for each transaction - taskCount, err := p.EnqueueTransactionTasks(ctx, block) - if err != nil { - return fmt.Errorf("failed to enqueue transaction tasks: %w", err) - } + // Calculate expected task count before enqueueing + expectedTaskCount := len(block.Transactions()) - // Initialize block tracking in Redis - if err := p.pendingTracker.InitBlock(ctx, nextBlock.Uint64(), taskCount, p.network.Name, p.Name(), p.processingMode); err != nil { - p.log.WithError(err).WithFields(logrus.Fields{ + // Acquire exclusive lock on this block via Redis FIRST + if initErr := p.pendingTracker.InitBlock(ctx, blockNumber.Uint64(), expectedTaskCount, p.network.Name, p.Name(), p.processingMode); initErr != nil { + // If block is already being processed by another worker, skip gracefully + if errors.Is(initErr, tracker.ErrBlockAlreadyBeingProcessed) { + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + }).Debug("Block already being processed by another worker, skipping") + + common.BlockProcessingSkipped.WithLabelValues(p.network.Name, p.Name(), "already_processing").Inc() + + return nil + } + + p.log.WithError(initErr).WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, }).Error("could not init block tracking in Redis") - return err + return initErr } - // Mark the block as enqueued (phase 1 of two-phase completion) - if err := p.stateManager.MarkBlockEnqueued(ctx, nextBlock.Uint64(), taskCount, p.network.Name, p.Name()); err != nil { - p.log.WithError(err).WithFields(logrus.Fields{ + // Mark the block as enqueued AFTER acquiring Redis lock (phase 1 of two-phase completion) + if markErr := p.stateManager.MarkBlockEnqueued(ctx, blockNumber.Uint64(), expectedTaskCount, p.network.Name, p.Name()); markErr != nil { + p.log.WithError(markErr).WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, }).Error("could not mark block as enqueued") - return err + // Clean up Redis lock since we failed to mark in ClickHouse + _ = p.pendingTracker.CleanupBlock(ctx, blockNumber.Uint64(), p.network.Name, p.Name(), p.processingMode) + + return markErr + } + + // Enqueue tasks for each transaction LAST + taskCount, err := p.EnqueueTransactionTasks(ctx, block) + if err != nil { + return fmt.Errorf("failed to enqueue transaction tasks: %w", err) + } + + // Log warning if actual count differs from expected + if taskCount != expectedTaskCount { + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + "expected_task_count": expectedTaskCount, + "actual_task_count": taskCount, + }).Warn("task count mismatch - some tasks may have failed to enqueue") } p.log.WithFields(logrus.Fields{ "network": p.network.Name, - "block_number": nextBlock, + "block_number": blockNumber, "tx_count": len(block.Transactions()), "task_count": taskCount, }).Info("enqueued block for processing") @@ -179,9 +249,8 @@ func (p *Processor) ProcessNextBlock(ctx context.Context) error { return nil } -// enqueueTransactionTasks enqueues tasks for all transactions in a block. // EnqueueTransactionTasks enqueues transaction processing tasks for a given block. -func (p *Processor) EnqueueTransactionTasks(ctx context.Context, block *types.Block) (int, error) { +func (p *Processor) EnqueueTransactionTasks(ctx context.Context, block execution.Block) (int, error) { var enqueuedCount int var errs []error @@ -246,34 +315,3 @@ func (p *Processor) EnqueueTransactionTasks(ctx context.Context, block *types.Bl return enqueuedCount, nil } - -// updateBlockMetrics updates block-related metrics. -func (p *Processor) updateBlockMetrics(ctx context.Context, blockNumber *big.Int) { - // Update block height - common.BlockHeight.WithLabelValues(p.network.Name, ProcessorName).Set(float64(blockNumber.Int64())) - - // Update blocks stored min/max - minBlock, maxBlock, err := p.stateManager.GetMinMaxStoredBlocks(ctx, p.network.Name, ProcessorName) - if err != nil { - p.log.WithError(err).WithField("network", p.network.Name).Debug("failed to get min/max stored blocks") - } else if minBlock != nil && maxBlock != nil { - common.BlocksStored.WithLabelValues(p.network.Name, ProcessorName, "min").Set(float64(minBlock.Int64())) - common.BlocksStored.WithLabelValues(p.network.Name, ProcessorName, "max").Set(float64(maxBlock.Int64())) - } - - // Update head distance metric - node := p.pool.GetHealthyExecutionNode() - if node != nil { - if latestBlockNum, err := node.BlockNumber(ctx); err == nil && latestBlockNum != nil { - executionHead := new(big.Int).SetUint64(*latestBlockNum) - - distance, headType, err := p.stateManager.GetHeadDistance(ctx, ProcessorName, p.network.Name, p.processingMode, executionHead) - if err != nil { - p.log.WithError(err).Debug("Failed to calculate head distance in processor metrics") - common.HeadDistance.WithLabelValues(p.network.Name, ProcessorName, "error").Set(-1) - } else { - common.HeadDistance.WithLabelValues(p.network.Name, ProcessorName, headType).Set(float64(distance)) - } - } - } -} diff --git a/pkg/processor/transaction/structlog/call_tracker.go b/pkg/processor/transaction/structlog/call_tracker.go new file mode 100644 index 0000000..805eb51 --- /dev/null +++ b/pkg/processor/transaction/structlog/call_tracker.go @@ -0,0 +1,81 @@ +package structlog + +// CallFrame represents a single call frame in the EVM execution. +type CallFrame struct { + ID uint32 // Sequential frame ID within the transaction + Depth uint64 // EVM depth level +} + +// CallTracker tracks call frames during EVM opcode traversal. +// It assigns sequential frame IDs as calls are entered and maintains +// the current path from root to the active frame. +type CallTracker struct { + stack []CallFrame // Stack of active call frames + nextID uint32 // Next frame ID to assign + path []uint32 // Current path from root to active frame +} + +// NewCallTracker creates a new CallTracker initialized with the root frame. +// The root frame has ID 0 and Depth 1, matching EVM structlog traces where +// execution starts at depth 1 (not 0). +func NewCallTracker() *CallTracker { + return &CallTracker{ + stack: []CallFrame{{ID: 0, Depth: 1}}, + nextID: 1, + path: []uint32{0}, + } +} + +// ProcessDepthChange processes a depth change and returns the current frame ID and path. +// Call this for each opcode with the opcode's depth value. +func (ct *CallTracker) ProcessDepthChange(newDepth uint64) (frameID uint32, framePath []uint32) { + currentDepth := ct.stack[len(ct.stack)-1].Depth + + if newDepth > currentDepth { + // Entering new call frame + newFrame := CallFrame{ID: ct.nextID, Depth: newDepth} + ct.stack = append(ct.stack, newFrame) + ct.path = append(ct.path, ct.nextID) + ct.nextID++ + } else if newDepth < currentDepth { + // Returning from call(s) - pop frames until depth matches + for len(ct.stack) > 1 && ct.stack[len(ct.stack)-1].Depth > newDepth { + ct.stack = ct.stack[:len(ct.stack)-1] + ct.path = ct.path[:len(ct.path)-1] + } + } + + // Return current frame info (copy path to avoid mutation issues) + pathCopy := make([]uint32, len(ct.path)) + copy(pathCopy, ct.path) + + return ct.stack[len(ct.stack)-1].ID, pathCopy +} + +// IssueFrameID allocates the next frame ID without processing a depth change. +// Used for synthetic frames (e.g., EOA calls that don't increase depth). +// Returns the new frame ID and the path for the synthetic child frame. +func (ct *CallTracker) IssueFrameID() (frameID uint32, framePath []uint32) { + newID := ct.nextID + ct.nextID++ + + // Path for synthetic frame is current path + new ID + pathCopy := make([]uint32, len(ct.path)+1) + copy(pathCopy, ct.path) + pathCopy[len(ct.path)] = newID + + return newID, pathCopy +} + +// CurrentFrameID returns the current frame ID without processing a depth change. +func (ct *CallTracker) CurrentFrameID() uint32 { + return ct.stack[len(ct.stack)-1].ID +} + +// CurrentPath returns a copy of the current path. +func (ct *CallTracker) CurrentPath() []uint32 { + pathCopy := make([]uint32, len(ct.path)) + copy(pathCopy, ct.path) + + return pathCopy +} diff --git a/pkg/processor/transaction/structlog/call_tracker_test.go b/pkg/processor/transaction/structlog/call_tracker_test.go new file mode 100644 index 0000000..5c30408 --- /dev/null +++ b/pkg/processor/transaction/structlog/call_tracker_test.go @@ -0,0 +1,662 @@ +package structlog + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewCallTracker(t *testing.T) { + ct := NewCallTracker() + + assert.Equal(t, uint32(0), ct.CurrentFrameID()) + assert.Equal(t, []uint32{0}, ct.CurrentPath()) +} + +func TestCallTracker_SameDepth(t *testing.T) { + ct := NewCallTracker() + + // All opcodes at depth 1 should stay in frame 0 (root) + frameID, path := ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) +} + +func TestCallTracker_SingleCall(t *testing.T) { + ct := NewCallTracker() + + // depth=1: root frame (EVM traces start at depth 1) + frameID, path := ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // depth=2: entering first call + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) + + // depth=2: still in first call + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) + + // depth=1: returned from call + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) +} + +func TestCallTracker_NestedCalls(t *testing.T) { + ct := NewCallTracker() + + // depth=1: root (EVM traces start at depth 1) + frameID, path := ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // depth=2: first call + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) + + // depth=3: nested call + frameID, path = ct.ProcessDepthChange(3) + assert.Equal(t, uint32(2), frameID) + assert.Equal(t, []uint32{0, 1, 2}, path) + + // depth=4: deeper nested call + frameID, path = ct.ProcessDepthChange(4) + assert.Equal(t, uint32(3), frameID) + assert.Equal(t, []uint32{0, 1, 2, 3}, path) + + // depth=3: return from depth 4 + frameID, path = ct.ProcessDepthChange(3) + assert.Equal(t, uint32(2), frameID) + assert.Equal(t, []uint32{0, 1, 2}, path) + + // depth=2: return from depth 3 + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) + + // depth=1: return to root + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) +} + +func TestCallTracker_SiblingCalls(t *testing.T) { + // Tests the scenario from the plan: + // root -> CALL (0x123) -> CALL (0x456) -> CALL (0x789) + // root -> CALL (0xabc) -> CALL (0x456) -> CALL (0x789) + ct := NewCallTracker() + + // depth=1: root (EVM traces start at depth 1) + frameID, path := ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // First branch: depth=2 (call to 0x123) + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) + + // depth=3 (call to 0x456) + frameID, path = ct.ProcessDepthChange(3) + assert.Equal(t, uint32(2), frameID) + assert.Equal(t, []uint32{0, 1, 2}, path) + + // depth=4 (call to 0x789) + frameID, path = ct.ProcessDepthChange(4) + assert.Equal(t, uint32(3), frameID) + assert.Equal(t, []uint32{0, 1, 2, 3}, path) + + // Return all the way to root + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // Second branch: depth=2 (call to 0xabc) - NEW frame_id! + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(4), frameID, "sibling call should get new frame_id") + assert.Equal(t, []uint32{0, 4}, path) + + // depth=3 (call to 0x456 again) - NEW frame_id! + frameID, path = ct.ProcessDepthChange(3) + assert.Equal(t, uint32(5), frameID, "same contract different call should get new frame_id") + assert.Equal(t, []uint32{0, 4, 5}, path) + + // depth=4 (call to 0x789 again) - NEW frame_id! + frameID, path = ct.ProcessDepthChange(4) + assert.Equal(t, uint32(6), frameID, "same contract different call should get new frame_id") + assert.Equal(t, []uint32{0, 4, 5, 6}, path) +} + +func TestCallTracker_MultipleReturns(t *testing.T) { + // Test returning multiple levels at once (e.g., REVERT that unwinds multiple frames) + ct := NewCallTracker() + + // Build up: depth 1 -> 2 -> 3 -> 4 (EVM traces start at depth 1) + ct.ProcessDepthChange(1) + ct.ProcessDepthChange(2) + ct.ProcessDepthChange(3) + frameID, path := ct.ProcessDepthChange(4) + assert.Equal(t, uint32(3), frameID) + assert.Equal(t, []uint32{0, 1, 2, 3}, path) + + // Jump directly from depth 4 to depth 2 (skipping depth 3) + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) +} + +func TestCallTracker_PathIsCopy(t *testing.T) { + ct := NewCallTracker() + + ct.ProcessDepthChange(1) + _, path1 := ct.ProcessDepthChange(2) + + // Modify path1, should not affect tracker's internal state + path1[0] = 999 + + _, path2 := ct.ProcessDepthChange(2) + require.Len(t, path2, 2) + assert.Equal(t, uint32(0), path2[0], "modifying returned path should not affect tracker") +} + +func TestCallTracker_DepthStartsAtOne(t *testing.T) { + // EVM traces always start at depth 1, which is the root frame (ID 0) + ct := NewCallTracker() + + // First opcode at depth 1 - should be frame 0 (root) + frameID, path := ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // Stay at depth 1 + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // Go deeper - creates frame 1 + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(1), frameID) + assert.Equal(t, []uint32{0, 1}, path) +} + +func TestCallTracker_IssueFrameID(t *testing.T) { + // Tests IssueFrameID for synthetic EOA frames + ct := NewCallTracker() + + // depth=1: root (EVM traces start at depth 1) + frameID, path := ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // Issue a synthetic frame for EOA call (no depth increase) + eoaFrameID, eoaPath := ct.IssueFrameID() + assert.Equal(t, uint32(1), eoaFrameID, "EOA frame should get next sequential ID") + assert.Equal(t, []uint32{0, 1}, eoaPath, "EOA path should be parent path + EOA ID") + + // Regular depth increase should get the next ID after the EOA frame + frameID, path = ct.ProcessDepthChange(2) + assert.Equal(t, uint32(2), frameID, "next real frame should get ID after EOA frame") + assert.Equal(t, []uint32{0, 2}, path) + + // Issue another EOA frame from depth 2 + eoaFrameID2, eoaPath2 := ct.IssueFrameID() + assert.Equal(t, uint32(3), eoaFrameID2) + assert.Equal(t, []uint32{0, 2, 3}, eoaPath2) + + // Return to depth 1 + frameID, path = ct.ProcessDepthChange(1) + assert.Equal(t, uint32(0), frameID) + assert.Equal(t, []uint32{0}, path) + + // Issue EOA from root - should continue sequential numbering + eoaFrameID3, eoaPath3 := ct.IssueFrameID() + assert.Equal(t, uint32(4), eoaFrameID3) + assert.Equal(t, []uint32{0, 4}, eoaPath3) +} + +func TestCallTracker_IssueFrameID_MultipleConsecutive(t *testing.T) { + // Test multiple consecutive EOA calls (e.g., contract sends to multiple EOAs) + ct := NewCallTracker() + + ct.ProcessDepthChange(1) // root + + // Three consecutive EOA calls + id1, path1 := ct.IssueFrameID() + id2, path2 := ct.IssueFrameID() + id3, path3 := ct.IssueFrameID() + + assert.Equal(t, uint32(1), id1) + assert.Equal(t, uint32(2), id2) + assert.Equal(t, uint32(3), id3) + + assert.Equal(t, []uint32{0, 1}, path1) + assert.Equal(t, []uint32{0, 2}, path2) + assert.Equal(t, []uint32{0, 3}, path3) + + // Next real call should continue from 4 + frameID, path := ct.ProcessDepthChange(2) + assert.Equal(t, uint32(4), frameID) + assert.Equal(t, []uint32{0, 4}, path) +} + +func TestIsPrecompile(t *testing.T) { + tests := []struct { + name string + addr string + expected bool + }{ + // Known precompiles (should return true) + {"ecrecover", "0x0000000000000000000000000000000000000001", true}, + {"sha256", "0x0000000000000000000000000000000000000002", true}, + {"ripemd160", "0x0000000000000000000000000000000000000003", true}, + {"identity", "0x0000000000000000000000000000000000000004", true}, + {"modexp", "0x0000000000000000000000000000000000000005", true}, + {"bn256Add", "0x0000000000000000000000000000000000000006", true}, + {"bn256ScalarMul", "0x0000000000000000000000000000000000000007", true}, + {"bn256Pairing", "0x0000000000000000000000000000000000000008", true}, + {"blake2f", "0x0000000000000000000000000000000000000009", true}, + {"kzgPointEvaluation", "0x000000000000000000000000000000000000000a", true}, + {"bls12381G1Add", "0x000000000000000000000000000000000000000b", true}, + {"bls12381G1Msm", "0x000000000000000000000000000000000000000c", true}, + {"bls12381G2Add", "0x000000000000000000000000000000000000000d", true}, + {"bls12381G2Msm", "0x000000000000000000000000000000000000000e", true}, + {"bls12381PairingCheck", "0x000000000000000000000000000000000000000f", true}, + {"bls12381MapFpToG1", "0x0000000000000000000000000000000000000010", true}, + {"bls12381MapFp2ToG2", "0x0000000000000000000000000000000000000011", true}, + {"p256Verify", "0x0000000000000000000000000000000000000100", true}, + + // Low addresses that are NOT precompiles (should return false) + // These are real EOAs/contracts deployed early in Ethereum's history + {"zero address", "0x0000000000000000000000000000000000000000", false}, + {"address 0x5c", "0x000000000000000000000000000000000000005c", false}, + {"address 0x60", "0x0000000000000000000000000000000000000060", false}, + {"address 0x44", "0x0000000000000000000000000000000000000044", false}, + {"address 0x348", "0x0000000000000000000000000000000000000348", false}, + {"address 0xffff", "0x000000000000000000000000000000000000ffff", false}, + {"address 0x12", "0x0000000000000000000000000000000000000012", false}, // Just above 0x11 + + // Real contract/EOA addresses (should return false) + {"WETH", "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2", false}, + {"Uniswap V2 Router", "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", false}, + {"vitalik.eth", "0xd8da6bf26964af9d7eed9e03e53415d37aa96045", false}, + {"random EOA", "0x1234567890abcdef1234567890abcdef12345678", false}, + + // Case insensitivity for precompiles + {"uppercase hex 0xA", "0x000000000000000000000000000000000000000A", true}, + {"mixed case 0xB", "0x000000000000000000000000000000000000000B", true}, + {"uppercase contract", "0xC02AAA39B223FE8D0A0E5C4F27EAD9083C756CC2", false}, + + // Without 0x prefix (edge case) + {"no prefix precompile", "0000000000000000000000000000000000000001", true}, + {"no prefix contract", "c02aaa39b223fe8d0a0e5c4f27ead9083c756cc2", false}, + + // Short addresses (should be padded) + {"short precompile 0x1", "0x1", true}, + {"short precompile 0x9", "0x9", true}, + {"short precompile 0x100", "0x100", true}, + {"short non-precompile 0x5c", "0x5c", false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := isPrecompile(tc.addr) + assert.Equal(t, tc.expected, result, "isPrecompile(%q) = %v, want %v", tc.addr, result, tc.expected) + }) + } +} + +// TestIsPrecompile_HardcodedList verifies that our hardcoded precompile list +// matches the expected Osaka precompiles from go-ethereum. +// Source: https://github.com/ethereum/go-ethereum/blob/master/core/vm/contracts.go +func TestIsPrecompile_HardcodedList(t *testing.T) { + // All expected Osaka precompiles (superset of all forks) + expectedPrecompiles := []string{ + "0x0000000000000000000000000000000000000001", // ecrecover + "0x0000000000000000000000000000000000000002", // sha256 + "0x0000000000000000000000000000000000000003", // ripemd160 + "0x0000000000000000000000000000000000000004", // identity + "0x0000000000000000000000000000000000000005", // modexp + "0x0000000000000000000000000000000000000006", // bn256Add + "0x0000000000000000000000000000000000000007", // bn256ScalarMul + "0x0000000000000000000000000000000000000008", // bn256Pairing + "0x0000000000000000000000000000000000000009", // blake2f + "0x000000000000000000000000000000000000000a", // kzgPointEvaluation + "0x000000000000000000000000000000000000000b", // bls12381G1Add + "0x000000000000000000000000000000000000000c", // bls12381G1MultiExp + "0x000000000000000000000000000000000000000d", // bls12381G2Add + "0x000000000000000000000000000000000000000e", // bls12381G2MultiExp + "0x000000000000000000000000000000000000000f", // bls12381Pairing + "0x0000000000000000000000000000000000000010", // bls12381MapG1 + "0x0000000000000000000000000000000000000011", // bls12381MapG2 + "0x0000000000000000000000000000000000000100", // p256Verify + } + + // Verify all expected precompiles are detected + for _, addr := range expectedPrecompiles { + assert.True(t, isPrecompile(addr), + "precompile %s should be detected", addr) + } + + // Verify the expected count: 0x01-0x11 (17) + 0x100 (1) = 18 precompiles + assert.Equal(t, 18, len(expectedPrecompiles), + "expected 18 precompiles in Osaka fork") + assert.Equal(t, 18, len(precompileAddresses), + "hardcoded precompileAddresses should have 18 entries") +} + +func TestIsCallOpcode(t *testing.T) { + tests := []struct { + opcode string + expected bool + }{ + // CALL-type opcodes (should return true) + {"CALL", true}, + {"CALLCODE", true}, + {"DELEGATECALL", true}, + {"STATICCALL", true}, + + // CREATE opcodes (should return false - they always increase depth) + {"CREATE", false}, + {"CREATE2", false}, + + // Other opcodes (should return false) + {"ADD", false}, + {"SUB", false}, + {"SLOAD", false}, + {"SSTORE", false}, + {"PUSH1", false}, + {"POP", false}, + {"JUMP", false}, + {"RETURN", false}, + {"REVERT", false}, + {"STOP", false}, + {"", false}, + } + + for _, tc := range tests { + t.Run(tc.opcode, func(t *testing.T) { + result := isCallOpcode(tc.opcode) + assert.Equal(t, tc.expected, result, "isCallOpcode(%q) = %v, want %v", tc.opcode, result, tc.expected) + }) + } +} + +// TestEOADetectionLogic tests the EOA call detection scenarios. +// This validates the fix for the bug where synthetic frames were incorrectly +// created for failed calls (depth decrease) instead of only for EOA calls (depth same). +func TestEOADetectionLogic(t *testing.T) { + // Helper to simulate the EOA detection logic from transaction_processing.go + shouldCreateSyntheticFrame := func(currentDepth, nextDepth uint64, hasNextOpcode bool, callToAddr string) bool { + if !hasNextOpcode { + // Last opcode is a CALL - we can't determine if it's EOA + // because we don't have a next opcode to compare depth with. + return false + } + + // Only create synthetic frame if depth stayed the same (EOA call) + // Depth increase = entered contract code (not EOA) + // Depth decrease = call returned/failed (not EOA) + // Depth same = called EOA or precompile (immediate return) + if nextDepth == currentDepth && !isPrecompile(callToAddr) { + return true + } + + return false + } + + tests := []struct { + name string + currentDepth uint64 + nextDepth uint64 + hasNextOp bool + callToAddr string + expectSynth bool + description string + }{ + // EOA call scenarios (should create synthetic frame) + { + name: "EOA call - depth stays same", + currentDepth: 2, + nextDepth: 2, + hasNextOp: true, + callToAddr: "0xd8da6bf26964af9d7eed9e03e53415d37aa96045", // vitalik.eth + expectSynth: true, + description: "CALL to EOA returns immediately, depth stays same", + }, + { + name: "EOA call from root depth", + currentDepth: 1, + nextDepth: 1, + hasNextOp: true, + callToAddr: "0x1234567890abcdef1234567890abcdef12345678", + expectSynth: true, + description: "CALL to EOA from root frame", + }, + + // Precompile call scenarios (should NOT create synthetic frame) + { + name: "precompile call - ecrecover", + currentDepth: 2, + nextDepth: 2, + hasNextOp: true, + callToAddr: "0x0000000000000000000000000000000000000001", + expectSynth: false, + description: "CALL to ecrecover precompile", + }, + { + name: "precompile call - sha256", + currentDepth: 2, + nextDepth: 2, + hasNextOp: true, + callToAddr: "0x0000000000000000000000000000000000000002", + expectSynth: false, + description: "CALL to sha256 precompile", + }, + { + name: "precompile call - kzg point eval", + currentDepth: 3, + nextDepth: 3, + hasNextOp: true, + callToAddr: "0x000000000000000000000000000000000000000a", + expectSynth: false, + description: "STATICCALL to KZG point evaluation precompile", + }, + + // Contract call scenarios (should NOT create synthetic frame) + { + name: "contract call - depth increases", + currentDepth: 2, + nextDepth: 3, + hasNextOp: true, + callToAddr: "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2", // WETH + expectSynth: false, + description: "CALL to contract enters code, depth increases", + }, + { + name: "delegatecall - depth increases", + currentDepth: 1, + nextDepth: 2, + hasNextOp: true, + callToAddr: "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // Uniswap Router + expectSynth: false, + description: "DELEGATECALL enters implementation code", + }, + { + name: "nested contract call", + currentDepth: 3, + nextDepth: 4, + hasNextOp: true, + callToAddr: "0xabcdef1234567890abcdef1234567890abcdef12", + expectSynth: false, + description: "Nested CALL enters deeper contract", + }, + + // Failed/returning call scenarios (should NOT create synthetic frame) + // This is the bug we fixed - depth DECREASE was incorrectly treated as EOA + { + name: "failed call - depth decreases by 1", + currentDepth: 3, + nextDepth: 2, + hasNextOp: true, + callToAddr: "0xde9c774cde34f85ee69c22e9a1077a0c9091f09b", + expectSynth: false, + description: "CALL failed/reverted, returned to caller depth", + }, + { + name: "failed call - depth decreases by 2", + currentDepth: 4, + nextDepth: 2, + hasNextOp: true, + callToAddr: "0xabcdef1234567890abcdef1234567890abcdef12", + expectSynth: false, + description: "CALL caused revert unwinding multiple frames", + }, + { + name: "out of gas - depth returns to root", + currentDepth: 3, + nextDepth: 1, + hasNextOp: true, + callToAddr: "0xfe02a32cbe0cb9ad9a945576a5bb53a3c123a3a3", + expectSynth: false, + description: "Out of gas unwinds all the way to root", + }, + { + name: "call returns normally", + currentDepth: 2, + nextDepth: 1, + hasNextOp: true, + callToAddr: "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2", + expectSynth: false, + description: "Contract call completed and returned", + }, + + // Last opcode scenarios (should NOT create synthetic frame) + { + name: "last opcode is CALL - no next opcode", + currentDepth: 2, + nextDepth: 0, // doesn't matter + hasNextOp: false, + callToAddr: "0xd8da6bf26964af9d7eed9e03e53415d37aa96045", + expectSynth: false, + description: "Transaction ends with CALL (likely failed)", + }, + { + name: "last opcode CALL to contract", + currentDepth: 1, + nextDepth: 0, + hasNextOp: false, + callToAddr: "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2", + expectSynth: false, + description: "Can't determine if EOA without next opcode", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := shouldCreateSyntheticFrame(tc.currentDepth, tc.nextDepth, tc.hasNextOp, tc.callToAddr) + assert.Equal(t, tc.expectSynth, result, + "%s: shouldCreateSyntheticFrame(depth=%d, nextDepth=%d, hasNext=%v, addr=%s) = %v, want %v", + tc.description, tc.currentDepth, tc.nextDepth, tc.hasNextOp, tc.callToAddr, result, tc.expectSynth) + }) + } +} + +// TestEOADetectionBugScenario_DepthDecrease verifies the fix for the bug where +// a CALL followed by depth decrease was incorrectly treated as an EOA call. +// Real-world example: transaction 0x4f7494... had a CALL at depth 3, next opcode +// was at depth 2 (returned/failed). The old <= check created a phantom synthetic frame. +func TestEOADetectionBugScenario_DepthDecrease(t *testing.T) { + // Simulate the buggy scenario from tx 0x4f7494c9f3b1bb7fb9f4d928aae41d971f0799a3d5c24df209074b70f04211f5 + // Index 235: GAS at depth 3 + // Index 236: CALL at depth 3 (to 0xde9c774cde34f85ee69c22e9a1077a0c9091f09b) + // Index 237: RETURNDATASIZE at depth 2 (call returned/failed) + currentDepth := uint64(3) + nextDepth := uint64(2) // Depth DECREASED (call returned/failed) + callToAddr := "0xde9c774cde34f85ee69c22e9a1077a0c9091f09b" + + // Old buggy logic: nextDepth <= currentDepth → 2 <= 3 → TRUE (wrong!) + buggyLogic := nextDepth <= currentDepth && !isPrecompile(callToAddr) + assert.True(t, buggyLogic, "Old buggy logic would have created synthetic frame") + + // Fixed logic: nextDepth == currentDepth → 2 == 3 → FALSE (correct!) + fixedLogic := nextDepth == currentDepth && !isPrecompile(callToAddr) + assert.False(t, fixedLogic, "Fixed logic should NOT create synthetic frame") +} + +// TestEOADetectionBugScenario_OutOfGas verifies the fix for the bug where +// a CALL as the last opcode (out of gas) was incorrectly treated as an EOA call. +// Real-world example: transaction 0x7178d8e3... ended with a CALL that ran out of gas. +func TestEOADetectionBugScenario_OutOfGas(t *testing.T) { + // Simulate the buggy scenario from tx 0x7178d8e3a33331ee0b2c42372c357cb6135bf3acd6e1eea5dbca7d9dbedfa418 + // Index 10: GAS at depth 1 + // Index 11: CALL at depth 1 (last opcode - out of gas before entering target) + // No index 12 (trace ended) + callToAddr := "0xfe02a32cbe0cb9ad9a945576a5bb53a3c123a3a3" + hasNextOpcode := false + + // Old buggy logic: "Last opcode is a CALL - if not precompile, must be EOA" + buggyLogic := !hasNextOpcode && !isPrecompile(callToAddr) + assert.True(t, buggyLogic, "Old buggy logic would have created synthetic frame") + + // Fixed logic: Don't assume last CALL is EOA - we can't determine without next opcode + fixedLogic := hasNextOpcode && !isPrecompile(callToAddr) // Always false when !hasNextOpcode + assert.False(t, fixedLogic, "Fixed logic should NOT create synthetic frame for last opcode") +} + +func TestCallTracker_RealWorldExample(t *testing.T) { + // Simulate a real EVM trace where depth starts at 1: + // op=PUSH1, depth=1 → frame_id=0, path=[0] (root execution) + // op=CALL(A),depth=1 → frame_id=0, path=[0] + // op=ADD, depth=2 → frame_id=1, path=[0,1] (inside A) + // op=CALL(B),d=2 → frame_id=1, path=[0,1] + // op=MUL, d=3 → frame_id=2, path=[0,1,2] (inside B) + // op=CALL(C),d=3 → frame_id=2, path=[0,1,2] + // op=SLOAD,d=4 → frame_id=3, path=[0,1,2,3] (inside C) + // op=RETURN,d=4 → frame_id=3, path=[0,1,2,3] + // op=ADD, d=3 → frame_id=2, path=[0,1,2] (back in B) + // op=RETURN,d=3 → frame_id=2, path=[0,1,2] + // op=POP, depth=2 → frame_id=1, path=[0,1] (back in A) + // op=STOP, depth=1 → frame_id=0, path=[0] (back in root) + ct := NewCallTracker() + + type expected struct { + depth uint64 + frameID uint32 + path []uint32 + } + + testCases := []expected{ + {1, 0, []uint32{0}}, // PUSH1 (root) + {1, 0, []uint32{0}}, // CALL(A) + {2, 1, []uint32{0, 1}}, // ADD (inside A) + {2, 1, []uint32{0, 1}}, // CALL(B) + {3, 2, []uint32{0, 1, 2}}, // MUL (inside B) + {3, 2, []uint32{0, 1, 2}}, // CALL(C) + {4, 3, []uint32{0, 1, 2, 3}}, // SLOAD (inside C) + {4, 3, []uint32{0, 1, 2, 3}}, // RETURN (inside C) + {3, 2, []uint32{0, 1, 2}}, // ADD (back in B) + {3, 2, []uint32{0, 1, 2}}, // RETURN (inside B) + {2, 1, []uint32{0, 1}}, // POP (back in A) + {1, 0, []uint32{0}}, // STOP (back in root) + } + + for i, tc := range testCases { + frameID, path := ct.ProcessDepthChange(tc.depth) + assert.Equal(t, tc.frameID, frameID, "case %d: frame_id mismatch", i) + assert.Equal(t, tc.path, path, "case %d: path mismatch", i) + } +} diff --git a/pkg/processor/transaction/structlog/columns.go b/pkg/processor/transaction/structlog/columns.go index a08f2af..bd91f7c 100644 --- a/pkg/processor/transaction/structlog/columns.go +++ b/pkg/processor/transaction/structlog/columns.go @@ -6,6 +6,19 @@ import ( "github.com/ClickHouse/ch-go/proto" ) +// ClickHouseTime wraps time.Time for ClickHouse DateTime formatting. +type ClickHouseTime time.Time + +// NewClickHouseTime creates a new ClickHouseTime from time.Time. +func NewClickHouseTime(t time.Time) ClickHouseTime { + return ClickHouseTime(t) +} + +// Time returns the underlying time.Time. +func (t ClickHouseTime) Time() time.Time { + return time.Time(t) +} + // Columns holds all columns for structlog batch insert using ch-go columnar protocol. type Columns struct { UpdatedDateTime proto.ColDateTime @@ -16,16 +29,18 @@ type Columns struct { TransactionFailed proto.ColBool TransactionReturnValue *proto.ColNullable[string] Index proto.ColUInt32 - ProgramCounter proto.ColUInt32 Operation proto.ColStr Gas proto.ColUInt64 GasCost proto.ColUInt64 GasUsed proto.ColUInt64 + GasSelf proto.ColUInt64 Depth proto.ColUInt64 ReturnData *proto.ColNullable[string] Refund *proto.ColNullable[uint64] Error *proto.ColNullable[string] CallToAddress *proto.ColNullable[string] + CallFrameID proto.ColUInt32 + CallFramePath *proto.ColArr[uint32] MetaNetworkName proto.ColStr } @@ -37,6 +52,7 @@ func NewColumns() *Columns { Refund: new(proto.ColUInt64).Nullable(), Error: new(proto.ColStr).Nullable(), CallToAddress: new(proto.ColStr).Nullable(), + CallFramePath: new(proto.ColUInt32).Array(), } } @@ -50,16 +66,18 @@ func (c *Columns) Append( txFailed bool, txReturnValue *string, index uint32, - pc uint32, op string, gas uint64, gasCost uint64, gasUsed uint64, + gasSelf uint64, depth uint64, returnData *string, refund *uint64, errStr *string, callTo *string, + callFrameID uint32, + callFramePath []uint32, network string, ) { c.UpdatedDateTime.Append(updatedDateTime) @@ -70,16 +88,18 @@ func (c *Columns) Append( c.TransactionFailed.Append(txFailed) c.TransactionReturnValue.Append(nullableStr(txReturnValue)) c.Index.Append(index) - c.ProgramCounter.Append(pc) c.Operation.Append(op) c.Gas.Append(gas) c.GasCost.Append(gasCost) c.GasUsed.Append(gasUsed) + c.GasSelf.Append(gasSelf) c.Depth.Append(depth) c.ReturnData.Append(nullableStr(returnData)) c.Refund.Append(nullableUint64(refund)) c.Error.Append(nullableStr(errStr)) c.CallToAddress.Append(nullableStr(callTo)) + c.CallFrameID.Append(callFrameID) + c.CallFramePath.Append(callFramePath) c.MetaNetworkName.Append(network) } @@ -93,16 +113,18 @@ func (c *Columns) Reset() { c.TransactionFailed.Reset() c.TransactionReturnValue.Reset() c.Index.Reset() - c.ProgramCounter.Reset() c.Operation.Reset() c.Gas.Reset() c.GasCost.Reset() c.GasUsed.Reset() + c.GasSelf.Reset() c.Depth.Reset() c.ReturnData.Reset() c.Refund.Reset() c.Error.Reset() c.CallToAddress.Reset() + c.CallFrameID.Reset() + c.CallFramePath.Reset() c.MetaNetworkName.Reset() } @@ -117,16 +139,18 @@ func (c *Columns) Input() proto.Input { {Name: "transaction_failed", Data: &c.TransactionFailed}, {Name: "transaction_return_value", Data: c.TransactionReturnValue}, {Name: "index", Data: &c.Index}, - {Name: "program_counter", Data: &c.ProgramCounter}, {Name: "operation", Data: &c.Operation}, {Name: "gas", Data: &c.Gas}, {Name: "gas_cost", Data: &c.GasCost}, {Name: "gas_used", Data: &c.GasUsed}, + {Name: "gas_self", Data: &c.GasSelf}, {Name: "depth", Data: &c.Depth}, {Name: "return_data", Data: c.ReturnData}, {Name: "refund", Data: c.Refund}, {Name: "error", Data: c.Error}, {Name: "call_to_address", Data: c.CallToAddress}, + {Name: "call_frame_id", Data: &c.CallFrameID}, + {Name: "call_frame_path", Data: c.CallFramePath}, {Name: "meta_network_name", Data: &c.MetaNetworkName}, } } diff --git a/pkg/processor/transaction/structlog/config.go b/pkg/processor/transaction/structlog/config.go index ed041b4..e948c1d 100644 --- a/pkg/processor/transaction/structlog/config.go +++ b/pkg/processor/transaction/structlog/config.go @@ -2,19 +2,26 @@ package structlog import ( "fmt" + "time" "github.com/ethpandaops/execution-processor/pkg/clickhouse" ) +// Default buffer configuration values. +const ( + DefaultBufferMaxRows = 100000 + DefaultBufferFlushInterval = time.Second +) + // Config holds configuration for transaction structlog processor. type Config struct { clickhouse.Config `yaml:",inline"` Enabled bool `yaml:"enabled"` Table string `yaml:"table"` - // Streaming settings - ChunkSize int `yaml:"chunkSize"` // Default: 10,000 rows per OnInput iteration - ProgressLogThreshold int `yaml:"progressLogThreshold"` // Default: 100,000 - log progress for large txs + // Row buffer settings for batched ClickHouse inserts + BufferMaxRows int `yaml:"bufferMaxRows"` // Max rows before flush. Default: 100000 + BufferFlushInterval time.Duration `yaml:"bufferFlushInterval"` // Max time before flush. Default: 1s // Block completion tracking MaxPendingBlockRange int `yaml:"maxPendingBlockRange"` // Max distance between oldest incomplete and current block. Default: 2 diff --git a/pkg/processor/transaction/structlog/create_address_test.go b/pkg/processor/transaction/structlog/create_address_test.go new file mode 100644 index 0000000..b77f466 --- /dev/null +++ b/pkg/processor/transaction/structlog/create_address_test.go @@ -0,0 +1,262 @@ +package structlog + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +const testCreateAddress = "0x1234567890abcdef1234567890abcdef12345678" + +func TestComputeCreateAddresses_Empty(t *testing.T) { + result := ComputeCreateAddresses([]execution.StructLog{}) + assert.Empty(t, result) +} + +func TestComputeCreateAddresses_NoCREATE(t *testing.T) { + structlogs := []execution.StructLog{ + {Op: "PUSH1", Depth: 1}, + {Op: "CALL", Depth: 1}, + {Op: "ADD", Depth: 2}, + {Op: "RETURN", Depth: 2}, + {Op: "STOP", Depth: 1}, + } + + result := ComputeCreateAddresses(structlogs) + assert.Empty(t, result) +} + +func TestComputeCreateAddresses_SingleCREATE(t *testing.T) { + // Simulate: CREATE at depth 2, constructor runs at depth 3, returns + createdAddr := "0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + stack := []string{createdAddr} + + structlogs := []execution.StructLog{ + {Op: "PUSH1", Depth: 2}, + {Op: "CREATE", Depth: 2}, // index 1 + {Op: "PUSH1", Depth: 3}, // constructor starts + {Op: "RETURN", Depth: 3}, // constructor ends + {Op: "SWAP1", Depth: 2, Stack: &stack}, // back in caller, stack has address + } + + result := ComputeCreateAddresses(structlogs) + + require.Contains(t, result, 1) + // Address is already 40 chars, so stays the same + assert.Equal(t, createdAddr, *result[1]) +} + +func TestComputeCreateAddresses_CREATE2(t *testing.T) { + createdAddr := "0xabcdefabcdefabcdefabcdefabcdefabcdefabcd" + stack := []string{createdAddr} + + structlogs := []execution.StructLog{ + {Op: "PUSH1", Depth: 1}, + {Op: "CREATE2", Depth: 1}, // index 1 + {Op: "ADD", Depth: 2}, // constructor + {Op: "RETURN", Depth: 2}, // constructor ends + {Op: "POP", Depth: 1, Stack: &stack}, // back in caller + } + + result := ComputeCreateAddresses(structlogs) + + require.Contains(t, result, 1) + assert.Equal(t, createdAddr, *result[1]) +} + +func TestComputeCreateAddresses_FailedCREATE(t *testing.T) { + // When CREATE fails immediately, next opcode is at same depth with 0 on stack + zeroAddr := "0x0" + stack := []string{zeroAddr} + + structlogs := []execution.StructLog{ + {Op: "PUSH1", Depth: 2}, + {Op: "CREATE", Depth: 2}, // index 1 - fails immediately + {Op: "ISZERO", Depth: 2, Stack: &stack}, // still at depth 2, stack has 0 + } + + result := ComputeCreateAddresses(structlogs) + + require.Contains(t, result, 1) + // Zero address is zero-padded to 40 hex chars + assert.Equal(t, "0x0000000000000000000000000000000000000000", *result[1]) +} + +func TestComputeCreateAddresses_NestedCREATEs(t *testing.T) { + // Outer CREATE at depth 1, inner CREATE at depth 2 + innerAddr := "0x1111111111111111111111111111111111111111" + outerAddr := "0x2222222222222222222222222222222222222222" + innerStack := []string{innerAddr} + outerStack := []string{outerAddr} + + structlogs := []execution.StructLog{ + {Op: "PUSH1", Depth: 1}, + {Op: "CREATE", Depth: 1}, // index 1 - outer CREATE + {Op: "PUSH1", Depth: 2}, // outer constructor starts + {Op: "CREATE", Depth: 2}, // index 3 - inner CREATE + {Op: "ADD", Depth: 3}, // inner constructor + {Op: "RETURN", Depth: 3}, // inner constructor ends + {Op: "POP", Depth: 2, Stack: &innerStack}, // back in outer constructor + {Op: "RETURN", Depth: 2}, // outer constructor ends + {Op: "SWAP1", Depth: 1, Stack: &outerStack}, // back in original caller + } + + result := ComputeCreateAddresses(structlogs) + + require.Contains(t, result, 1) + require.Contains(t, result, 3) + assert.Equal(t, outerAddr, *result[1]) + assert.Equal(t, innerAddr, *result[3]) +} + +func TestComputeCreateAddresses_MultipleCREATEsSameDepth(t *testing.T) { + // Two CREATEs at the same depth (sequential, not nested) + addr1 := "0x1111111111111111111111111111111111111111" + addr2 := "0x2222222222222222222222222222222222222222" + stack1 := []string{addr1} + stack2 := []string{addr2} + + structlogs := []execution.StructLog{ + {Op: "PUSH1", Depth: 1}, + {Op: "CREATE", Depth: 1}, // index 1 - first CREATE + {Op: "ADD", Depth: 2}, // first constructor + {Op: "RETURN", Depth: 2}, // first constructor ends + {Op: "POP", Depth: 1, Stack: &stack1}, // back, has first address + {Op: "PUSH1", Depth: 1}, + {Op: "CREATE", Depth: 1}, // index 6 - second CREATE + {Op: "MUL", Depth: 2}, // second constructor + {Op: "RETURN", Depth: 2}, // second constructor ends + {Op: "SWAP1", Depth: 1, Stack: &stack2}, // back, has second address + } + + result := ComputeCreateAddresses(structlogs) + + require.Contains(t, result, 1) + require.Contains(t, result, 6) + assert.Equal(t, addr1, *result[1]) + assert.Equal(t, addr2, *result[6]) +} + +func TestExtractCallAddressWithCreate_CREATE(t *testing.T) { + p := &Processor{} + createAddresses := map[int]*string{ + 0: ptrString(testCreateAddress), + } + + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: "CREATE", + }, 0, createAddresses) + + assert.NotNil(t, result) + assert.Equal(t, testCreateAddress, *result) +} + +func TestExtractCallAddressWithCreate_CREATE2(t *testing.T) { + p := &Processor{} + addr := "0xabcdef1234567890abcdef1234567890abcdef12" + createAddresses := map[int]*string{ + 5: ptrString(addr), + } + + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: "CREATE2", + }, 5, createAddresses) + + assert.NotNil(t, result) + assert.Equal(t, addr, *result) +} + +func TestExtractCallAddressWithCreate_CREATEWithNilMap(t *testing.T) { + p := &Processor{} + + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: "CREATE", + }, 0, nil) + + assert.Nil(t, result) +} + +func TestExtractCallAddressWithCreate_CREATENotInMap(t *testing.T) { + p := &Processor{} + createAddresses := map[int]*string{ + 10: ptrString(testCreateAddress), + } + + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: "CREATE", + }, 5, createAddresses) // index 5 not in map + + assert.Nil(t, result) +} + +func TestExtractCallAddressWithCreate_CALLDelegatesToExtractCallAddress(t *testing.T) { + p := &Processor{} + createAddresses := map[int]*string{ + 0: ptrString(testCreateAddress), + } + stack := []string{"0x5208", "0xdeadbeef"} + + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }, 0, createAddresses) + + // Should use extractCallAddress, not createAddresses + assert.NotNil(t, result) + // Second from top of stack, zero-padded to 40 hex chars + assert.Equal(t, "0x0000000000000000000000000000000000005208", *result) +} + +func TestExtractCallAddressWithCreate_DELEGATECALLDelegatesToExtractCallAddress(t *testing.T) { + p := &Processor{} + createAddresses := map[int]*string{ + 0: ptrString(testCreateAddress), + } + stack := []string{"0x5208", "0xdeadbeef"} + + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: "DELEGATECALL", + Stack: &stack, + }, 0, createAddresses) + + assert.NotNil(t, result) + // Zero-padded to 40 hex chars + assert.Equal(t, "0x0000000000000000000000000000000000005208", *result) +} + +func TestExtractCallAddressWithCreate_NonCallOpcodeReturnsNil(t *testing.T) { + p := &Processor{} + createAddresses := map[int]*string{ + 0: ptrString(testCreateAddress), + } + stack := []string{"0x5208", "0xdeadbeef"} + + testCases := []string{ + "PUSH1", + "ADD", + "SLOAD", + "SSTORE", + "RETURN", + "REVERT", + "STOP", + } + + for _, op := range testCases { + t.Run(op, func(t *testing.T) { + result := p.extractCallAddressWithCreate(&execution.StructLog{ + Op: op, + Stack: &stack, + }, 0, createAddresses) + + assert.Nil(t, result, "opcode %s should return nil", op) + }) + } +} + +// ptrString returns a pointer to the given string. +func ptrString(s string) *string { + return &s +} diff --git a/pkg/processor/transaction/structlog/extract_call_address_test.go b/pkg/processor/transaction/structlog/extract_call_address_test.go new file mode 100644 index 0000000..d72cb7f --- /dev/null +++ b/pkg/processor/transaction/structlog/extract_call_address_test.go @@ -0,0 +1,270 @@ +package structlog + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +func TestExtractCallAddress_NilStack(t *testing.T) { + p := &Processor{} + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: nil, + }) + + assert.Nil(t, result) +} + +func TestExtractCallAddress_EmptyStack(t *testing.T) { + p := &Processor{} + emptyStack := []string{} + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &emptyStack, + }) + + assert.Nil(t, result) +} + +func TestExtractCallAddress_InsufficientStack(t *testing.T) { + p := &Processor{} + stack := []string{"0x1234"} // Only 1 element, need at least 2 + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }) + + assert.Nil(t, result) +} + +func TestExtractCallAddress_CALL(t *testing.T) { + p := &Processor{} + // CALL stack (index 0 = bottom, len-1 = top): + // [retSize, retOffset, argsSize, argsOffset, value, addr, gas] + // Address is at index len-2 (second from top) + stack := []string{ + "0x0", // retSize (bottom, index 0) + "0x0", // retOffset + "0x0", // argsSize + "0x0", // argsOffset + "0x0", // value + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // addr (index len-2) + "0x5208", // gas (top, index len-1) + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", *result) +} + +func TestExtractCallAddress_CALL_MinimalStack(t *testing.T) { + p := &Processor{} + // Minimal stack with just 2 elements (addr at index 0, gas at index 1) + stack := []string{ + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // addr (index 0 = len-2) + "0x5208", // gas (index 1 = len-1) + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", *result) +} + +func TestExtractCallAddress_CALL_WithExtraStackItemsBelow(t *testing.T) { + p := &Processor{} + // Stack with extra items BELOW CALL args (at the bottom) + // The CALL args are still at the top, so len-2 still gives addr + stack := []string{ + "0xdeadbeef", // extra item (bottom) + "0xcafebabe", // another extra item + "0x0", // retSize (start of CALL args) + "0x0", // retOffset + "0x0", // argsSize + "0x0", // argsOffset + "0x0", // value + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // addr (len-2) + "0x5208", // gas (top, len-1) + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", *result) +} + +func TestExtractCallAddress_CALLCODE(t *testing.T) { + p := &Processor{} + // CALLCODE has same stack layout as CALL + stack := []string{ + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // addr + "0x5208", // gas + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALLCODE", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", *result) +} + +func TestExtractCallAddress_DELEGATECALL(t *testing.T) { + p := &Processor{} + // DELEGATECALL stack (no value parameter, but addr still at len-2): + // [retSize, retOffset, argsSize, argsOffset, addr, gas] + stack := []string{ + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // addr + "0x5208", // gas + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "DELEGATECALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", *result) +} + +func TestExtractCallAddress_STATICCALL(t *testing.T) { + p := &Processor{} + // STATICCALL has same stack layout as DELEGATECALL + stack := []string{ + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", // addr + "0x5208", // gas + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "STATICCALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", *result) +} + +func TestExtractCallAddress_NonCallOpcode(t *testing.T) { + p := &Processor{} + stack := []string{"0x1234", "0x5678"} + + testCases := []string{ + "PUSH1", + "ADD", + "SLOAD", + "SSTORE", + "JUMP", + "RETURN", + "REVERT", + "CREATE", // CREATE is not handled (address comes from trace) + "CREATE2", // CREATE2 is not handled (address comes from trace) + } + + for _, op := range testCases { + t.Run(op, func(t *testing.T) { + result := p.extractCallAddress(&execution.StructLog{ + Op: op, + Stack: &stack, + }) + assert.Nil(t, result, "opcode %s should not extract call address", op) + }) + } +} + +func TestExtractCallAddress_ShortAddressPadding(t *testing.T) { + p := &Processor{} + // Test that short addresses (like precompiles) get zero-padded + stack := []string{ + "0x1", // addr - precompile ecRecover, should be padded + "0x5208", // gas + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x0000000000000000000000000000000000000001", *result) + assert.Len(t, *result, 42) +} + +func TestExtractCallAddress_Permit2Padding(t *testing.T) { + p := &Processor{} + // Test Permit2 address with leading zeros + stack := []string{ + "0x22d473030f116ddee9f6b43ac78ba3", // Permit2 truncated + "0x5208", // gas + } + + result := p.extractCallAddress(&execution.StructLog{ + Op: "CALL", + Stack: &stack, + }) + + assert.NotNil(t, result) + assert.Equal(t, "0x000000000022d473030f116ddee9f6b43ac78ba3", *result) + assert.Len(t, *result, 42) +} + +func TestExtractCallAddress_AllCallVariants(t *testing.T) { + // Table-driven test for all supported CALL variants + p := &Processor{} + + targetAddr := "0x7a250d5630b4cf539739df2c5dacb4c659f2488d" + + testCases := []struct { + name string + op string + stack []string // Stack with addr at len-2 and gas at len-1 + }{ + { + name: "CALL with full stack", + op: "CALL", + stack: []string{"0xretSize", "0xretOff", "0xargsSize", "0xargsOff", "0xvalue", targetAddr, "0xgas"}, + }, + { + name: "CALLCODE with full stack", + op: "CALLCODE", + stack: []string{"0xretSize", "0xretOff", "0xargsSize", "0xargsOff", "0xvalue", targetAddr, "0xgas"}, + }, + { + name: "DELEGATECALL with full stack", + op: "DELEGATECALL", + stack: []string{"0xretSize", "0xretOff", "0xargsSize", "0xargsOff", targetAddr, "0xgas"}, + }, + { + name: "STATICCALL with full stack", + op: "STATICCALL", + stack: []string{"0xretSize", "0xretOff", "0xargsSize", "0xargsOff", targetAddr, "0xgas"}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := p.extractCallAddress(&execution.StructLog{ + Op: tc.op, + Stack: &tc.stack, + }) + assert.NotNil(t, result) + assert.Equal(t, targetAddr, *result) + }) + } +} diff --git a/pkg/processor/transaction/structlog/format_address_test.go b/pkg/processor/transaction/structlog/format_address_test.go new file mode 100644 index 0000000..7b26b62 --- /dev/null +++ b/pkg/processor/transaction/structlog/format_address_test.go @@ -0,0 +1,115 @@ +package structlog + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFormatAddress(t *testing.T) { + testCases := []struct { + name string + input string + expected string + }{ + { + name: "already 40 chars with 0x prefix", + input: "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", + expected: "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", + }, + { + name: "already 40 chars without 0x prefix", + input: "7a250d5630b4cf539739df2c5dacb4c659f2488d", + expected: "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", + }, + { + name: "precompile address 0x1", + input: "0x1", + expected: "0x0000000000000000000000000000000000000001", + }, + { + name: "precompile address 0xa", + input: "0xa", + expected: "0x000000000000000000000000000000000000000a", + }, + { + name: "Permit2 with leading zeros truncated", + input: "0x22d473030f116ddee9f6b43ac78ba3", + expected: "0x000000000022d473030f116ddee9f6b43ac78ba3", + }, + { + name: "Uniswap PoolManager with leading zeros truncated", + input: "0x4444c5dc75cb358380d2e3de08a90", + expected: "0x000000000004444c5dc75cb358380d2e3de08a90", + }, + { + name: "zero address", + input: "0x0", + expected: "0x0000000000000000000000000000000000000000", + }, + { + name: "short address without 0x prefix", + input: "5208", + expected: "0x0000000000000000000000000000000000005208", + }, + { + name: "short address with 0x prefix", + input: "0x5208", + expected: "0x0000000000000000000000000000000000005208", + }, + { + name: "empty string", + input: "", + expected: "0x0000000000000000000000000000000000000000", + }, + { + name: "just 0x prefix", + input: "0x", + expected: "0x0000000000000000000000000000000000000000", + }, + // Full 32-byte stack values (66 chars) - extract lower 20 bytes + { + name: "full 32-byte stack value from XEN Batch Minter", + input: "0x661f30bf3a790c8687131ae8fc6e649df9f27275fc286db8f1a0be7e99b24bb2", + expected: "0xfc6e649df9f27275fc286db8f1a0be7e99b24bb2", + }, + { + name: "full 32-byte stack value - all zeros except address", + input: "0x0000000000000000000000007a250d5630b4cf539739df2c5dacb4c659f2488d", + expected: "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", + }, + { + name: "full 32-byte stack value without 0x prefix", + input: "661f30bf3a790c8687131ae8fc6e649df9f27275fc286db8f1a0be7e99b24bb2", + expected: "0xfc6e649df9f27275fc286db8f1a0be7e99b24bb2", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := formatAddress(tc.input) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestFormatAddress_LengthConsistency(t *testing.T) { + // All formatted addresses should be exactly 42 characters (0x + 40 hex chars) + inputs := []string{ + "0x1", + "0xa", + "0xdeadbeef", + "0x7a250d5630b4cf539739df2c5dacb4c659f2488d", + "1", + "abcdef", + "", + } + + for _, input := range inputs { + t.Run(input, func(t *testing.T) { + result := formatAddress(input) + assert.Len(t, result, 42, "formatted address should always be 42 chars") + assert.Equal(t, "0x", result[:2], "formatted address should start with 0x") + }) + } +} diff --git a/pkg/processor/transaction/structlog/gas_cost.go b/pkg/processor/transaction/structlog/gas_cost.go index aa5c0fd..bcf5968 100644 --- a/pkg/processor/transaction/structlog/gas_cost.go +++ b/pkg/processor/transaction/structlog/gas_cost.go @@ -6,6 +6,86 @@ import ( "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" ) +// ============================================================================= +// GAS FIELDS +// ============================================================================= +// +// The structlog contains three gas-related fields: +// +// GasCost +// Source: Directly from geth/erigon debug_traceTransaction response. +// For non-CALL opcodes: The static cost charged for the opcode. +// For CALL/CREATE opcodes: The gas stipend passed to the child frame. +// +// GasUsed +// Source: Computed as gas[i] - gas[i+1] for consecutive opcodes at same depth. +// For non-CALL opcodes: Actual gas consumed by the opcode. +// For CALL/CREATE opcodes: Includes the call overhead PLUS all child frame gas. +// Note: Summing gas_used across all opcodes double counts because CALL's +// gas_used includes child gas, and children also report their own gas_used. +// +// GasSelf +// Source: Computed as gas_used minus the sum of all child frame gas_used. +// For non-CALL opcodes: Equal to gas_used. +// For CALL/CREATE opcodes: Only the call overhead (warm/cold access, memory +// expansion, value transfer) without child frame gas. +// Summing gas_self across all opcodes gives total execution gas without +// double counting. +// +// Example for a CALL opcode: +// gas_cost = 7,351,321 (stipend passed to child) +// gas_used = 23,858 (overhead 2,600 + child consumed 21,258) +// gas_self = 2,600 (just the CALL overhead) +// +// ============================================================================= + +// Opcode constants for call and create operations. +const ( + OpcodeCALL = "CALL" + OpcodeCALLCODE = "CALLCODE" + OpcodeDELEGATECALL = "DELEGATECALL" + OpcodeSTATICCALL = "STATICCALL" + OpcodeCREATE = "CREATE" + OpcodeCREATE2 = "CREATE2" +) + +// hasPrecomputedGasUsed detects whether GasUsed values are pre-computed by the tracer. +// +// In embedded mode, the tracer computes GasUsed inline during trace capture, +// populating this field with non-zero values. In RPC mode, GasUsed is always 0 +// and must be computed post-hoc using ComputeGasUsed(). +// +// This enables backward compatibility: execution-processor works with both +// embedded mode (optimized, pre-computed) and RPC mode (legacy, post-computed). +func hasPrecomputedGasUsed(structlogs []execution.StructLog) bool { + if len(structlogs) == 0 { + return false + } + + // Check first structlog - if GasUsed > 0, tracer pre-computed values. + return structlogs[0].GasUsed > 0 +} + +// hasPrecomputedCreateAddresses detects whether CREATE/CREATE2 addresses are pre-computed. +// +// In embedded mode, the tracer resolves CREATE addresses inline when the constructor +// returns, populating CallToAddress. In RPC mode, CallToAddress is nil for CREATE +// opcodes and must be computed post-hoc using ComputeCreateAddresses(). +// +// Returns true if any CREATE/CREATE2 opcode has CallToAddress pre-populated. +func hasPrecomputedCreateAddresses(structlogs []execution.StructLog) bool { + for i := range structlogs { + op := structlogs[i].Op + if op == OpcodeCREATE || op == OpcodeCREATE2 { + // If any CREATE has CallToAddress populated, tracer pre-computed. + return structlogs[i].CallToAddress != nil + } + } + + // No CREATE/CREATE2 opcodes found - doesn't matter, return false to use standard path. + return false +} + // ComputeGasUsed calculates the actual gas consumed for each structlog using // the difference between consecutive gas values at the same depth level. // @@ -53,7 +133,12 @@ func ComputeGasUsed(structlogs []execution.StructLog) []uint64 { // Update gasUsed for pending log at current depth if prevIdx := pendingIdx[depth]; prevIdx >= 0 && prevIdx < len(structlogs) { - gasUsed[prevIdx] = structlogs[prevIdx].Gas - structlogs[i].Gas + // Guard against underflow: if gas values are corrupted or out of order, + // fall back to the pre-calculated GasCost instead of underflowing + if structlogs[prevIdx].Gas >= structlogs[i].Gas { + gasUsed[prevIdx] = structlogs[prevIdx].Gas - structlogs[i].Gas + } + // else: keep the fallback GasCost value set during initialization } // Store current log's index as pending at this depth @@ -62,3 +147,65 @@ func ComputeGasUsed(structlogs []execution.StructLog) []uint64 { return gasUsed } + +// ComputeGasSelf calculates the gas consumed by each opcode excluding child frame gas. +// For CALL/CREATE opcodes, this represents only the call overhead (warm/cold access, +// memory expansion, value transfer), not the gas consumed by child frames. +// For all other opcodes, this equals gasUsed. +// +// This is useful for gas analysis where you want to sum gas without double counting: +// sum(gasSelf) = total transaction execution gas (no double counting). +func ComputeGasSelf(structlogs []execution.StructLog, gasUsed []uint64) []uint64 { + if len(structlogs) == 0 { + return nil + } + + gasSelf := make([]uint64, len(structlogs)) + copy(gasSelf, gasUsed) + + for i := range structlogs { + op := structlogs[i].Op + if !isCallOrCreateOpcode(op) { + continue + } + + callDepth := structlogs[i].Depth + + var childGasSum uint64 + + // Sum gas_used for DIRECT children only (depth == callDepth + 1). + // We only sum direct children because their gas_used already includes + // any nested descendants. Summing all descendants would double count. + for j := i + 1; j < len(structlogs); j++ { + if structlogs[j].Depth <= callDepth { + break + } + + if structlogs[j].Depth == callDepth+1 { + childGasSum += gasUsed[j] + } + } + + // gasSelf = total gas attributed to this CALL minus child execution + // This gives us just the CALL overhead + if gasUsed[i] >= childGasSum { + gasSelf[i] = gasUsed[i] - childGasSum + } else { + // Edge case: if child gas exceeds parent (shouldn't happen in valid traces) + // fall back to 0 to avoid underflow + gasSelf[i] = 0 + } + } + + return gasSelf +} + +// isCallOrCreateOpcode returns true if the opcode spawns a new call frame. +func isCallOrCreateOpcode(op string) bool { + switch op { + case OpcodeCALL, OpcodeCALLCODE, OpcodeDELEGATECALL, OpcodeSTATICCALL, OpcodeCREATE, OpcodeCREATE2: + return true + default: + return false + } +} diff --git a/pkg/processor/transaction/structlog/gas_cost_test.go b/pkg/processor/transaction/structlog/gas_cost_test.go index 868d690..02dacda 100644 --- a/pkg/processor/transaction/structlog/gas_cost_test.go +++ b/pkg/processor/transaction/structlog/gas_cost_test.go @@ -9,6 +9,75 @@ import ( "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" ) +// ============================================================================= +// hasPrecomputedGasUsed Tests +// ============================================================================= + +func TestHasPrecomputedGasUsed_Empty(t *testing.T) { + assert.False(t, hasPrecomputedGasUsed(nil)) + assert.False(t, hasPrecomputedGasUsed([]execution.StructLog{})) +} + +func TestHasPrecomputedGasUsed_WithGasUsed(t *testing.T) { + structlogs := []execution.StructLog{ + {Op: "PUSH1", GasUsed: 3}, + } + assert.True(t, hasPrecomputedGasUsed(structlogs)) +} + +func TestHasPrecomputedGasUsed_WithoutGasUsed(t *testing.T) { + structlogs := []execution.StructLog{ + {Op: "PUSH1", GasUsed: 0}, + } + assert.False(t, hasPrecomputedGasUsed(structlogs)) +} + +// ============================================================================= +// hasPrecomputedCreateAddresses Tests +// ============================================================================= + +func TestHasPrecomputedCreateAddresses_Empty(t *testing.T) { + assert.False(t, hasPrecomputedCreateAddresses(nil)) + assert.False(t, hasPrecomputedCreateAddresses([]execution.StructLog{})) +} + +func TestHasPrecomputedCreateAddresses_NoCreate(t *testing.T) { + structlogs := []execution.StructLog{ + {Op: "PUSH1"}, + {Op: "CALL"}, + } + assert.False(t, hasPrecomputedCreateAddresses(structlogs)) +} + +func TestHasPrecomputedCreateAddresses_CreateWithAddress(t *testing.T) { + addr := "0x1234567890123456789012345678901234567890" + structlogs := []execution.StructLog{ + {Op: "PUSH1"}, + {Op: "CREATE", CallToAddress: &addr}, + } + assert.True(t, hasPrecomputedCreateAddresses(structlogs)) +} + +func TestHasPrecomputedCreateAddresses_CreateWithoutAddress(t *testing.T) { + structlogs := []execution.StructLog{ + {Op: "PUSH1"}, + {Op: "CREATE", CallToAddress: nil}, + } + assert.False(t, hasPrecomputedCreateAddresses(structlogs)) +} + +func TestHasPrecomputedCreateAddresses_Create2WithAddress(t *testing.T) { + addr := "0x1234567890123456789012345678901234567890" + structlogs := []execution.StructLog{ + {Op: "CREATE2", CallToAddress: &addr}, + } + assert.True(t, hasPrecomputedCreateAddresses(structlogs)) +} + +// ============================================================================= +// ComputeGasUsed Tests +// ============================================================================= + func TestComputeGasUsed_EmptyLogs(t *testing.T) { result := ComputeGasUsed(nil) assert.Nil(t, result) @@ -313,3 +382,337 @@ func TestComputeGasUsed_LargeDepth(t *testing.T) { assert.Equal(t, uint64(2), result[8]) assert.Equal(t, uint64(2), result[9]) } + +func TestComputeGasUsed_NoUnderflow_CorruptedGasValues(t *testing.T) { + // Test that corrupted/out-of-order gas values don't cause uint64 underflow. + // In valid traces, gas[i] < gas[i-1] (gas decreases). But if a trace has + // corrupted data where gas[i] > gas[i-1], we should NOT underflow. + // + // Without the fix, this would produce values like 18,446,744,073,709,551,613 + // (near uint64 max) instead of falling back to GasCost. + structlogs := []execution.StructLog{ + {Op: "PUSH1", Gas: 100000, GasCost: 3, Depth: 1}, + {Op: "ADD", Gas: 200000, GasCost: 3, Depth: 1}, // CORRUPTED: gas increased! + {Op: "STOP", Gas: 199997, GasCost: 0, Depth: 1}, + } + + result := ComputeGasUsed(structlogs) + + require.Len(t, result, 3) + + // PUSH1: gas[0]=100000, gas[1]=200000, so 100000 - 200000 would underflow! + // The fix should fall back to GasCost (3) instead. + assert.Equal(t, uint64(3), result[0], "should fall back to GasCost, not underflow") + + // Verify it's NOT a huge underflow value + assert.Less(t, result[0], uint64(1000000), "result should be reasonable, not an underflow") + + // ADD: 200000 - 199997 = 3 (normal case, no underflow) + assert.Equal(t, uint64(3), result[1]) + + // STOP: keeps pre-calculated (last opcode) + assert.Equal(t, uint64(0), result[2]) +} + +func TestComputeGasUsed_NoUnderflow_AllCorrupted(t *testing.T) { + // Test where all gas values are corrupted (increasing instead of decreasing) + structlogs := []execution.StructLog{ + {Op: "PUSH1", Gas: 100000, GasCost: 3, Depth: 1}, + {Op: "PUSH1", Gas: 110000, GasCost: 3, Depth: 1}, // CORRUPTED + {Op: "ADD", Gas: 120000, GasCost: 3, Depth: 1}, // CORRUPTED + {Op: "STOP", Gas: 130000, GasCost: 0, Depth: 1}, // CORRUPTED + } + + result := ComputeGasUsed(structlogs) + + require.Len(t, result, 4) + + // All should fall back to GasCost since all would underflow + for i, r := range result { + assert.Less(t, r, uint64(1000000), + "result[%d] should be reasonable, not an underflow", i) + } +} + +// ============================================================================= +// ComputeGasSelf Tests +// ============================================================================= + +func TestComputeGasSelf_EmptyLogs(t *testing.T) { + result := ComputeGasSelf(nil, nil) + assert.Nil(t, result) + + result = ComputeGasSelf([]execution.StructLog{}, []uint64{}) + assert.Nil(t, result) +} + +func TestComputeGasSelf_NonCallOpcodes(t *testing.T) { + // For non-CALL opcodes, gas_self should equal gas_used + structlogs := []execution.StructLog{ + {Op: "PUSH1", Gas: 100000, GasCost: 3, Depth: 1}, + {Op: "SLOAD", Gas: 99997, GasCost: 2100, Depth: 1}, + {Op: "ADD", Gas: 97897, GasCost: 3, Depth: 1}, + } + + gasUsed := []uint64{3, 2100, 3} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 3) + assert.Equal(t, uint64(3), result[0], "PUSH1 gas_self should equal gas_used") + assert.Equal(t, uint64(2100), result[1], "SLOAD gas_self should equal gas_used") + assert.Equal(t, uint64(3), result[2], "ADD gas_self should equal gas_used") +} + +func TestComputeGasSelf_SimpleCall(t *testing.T) { + // CALL at depth 1 with child opcodes at depth 2 + // gas_self for CALL should be gas_used minus sum of direct children's gas_used + structlogs := []execution.StructLog{ + {Op: "PUSH1", Gas: 100000, GasCost: 3, Depth: 1}, // index 0 + {Op: "CALL", Gas: 99997, GasCost: 100, Depth: 1}, // index 1: CALL + {Op: "PUSH1", Gas: 63000, GasCost: 3, Depth: 2}, // index 2: child + {Op: "ADD", Gas: 62000, GasCost: 3, Depth: 2}, // index 3: child + {Op: "STOP", Gas: 61000, GasCost: 0, Depth: 2}, // index 4: child + {Op: "POP", Gas: 97000, GasCost: 2, Depth: 1}, // index 5: back to parent + } + + // gas_used values (computed by ComputeGasUsed logic): + // PUSH1[0]: 100000 - 99997 = 3 + // CALL[1]: 99997 - 97000 = 2997 (includes child execution) + // PUSH1[2]: 63000 - 62000 = 1000 + // ADD[3]: 62000 - 61000 = 1000 + // STOP[4]: 0 (pre-calculated, last at depth 2) + // POP[5]: 2 (pre-calculated, last opcode) + gasUsed := []uint64{3, 2997, 1000, 1000, 0, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 6) + + // Non-CALL opcodes: gas_self == gas_used + assert.Equal(t, uint64(3), result[0], "PUSH1 gas_self") + assert.Equal(t, uint64(1000), result[2], "child PUSH1 gas_self") + assert.Equal(t, uint64(1000), result[3], "child ADD gas_self") + assert.Equal(t, uint64(0), result[4], "child STOP gas_self") + assert.Equal(t, uint64(2), result[5], "POP gas_self") + + // CALL: gas_self = gas_used - sum(direct children) + // direct children at depth 2: indices 2, 3, 4 + // sum = 1000 + 1000 + 0 = 2000 + // gas_self = 2997 - 2000 = 997 + assert.Equal(t, uint64(997), result[1], "CALL gas_self should be overhead only") +} + +func TestComputeGasSelf_NestedCalls(t *testing.T) { + // This is the critical test: nested CALLs where we must only sum direct children. + // If we sum ALL descendants, we double count and get incorrect (often 0) values. + // + // Structure: + // CALL A (depth 1) -> child frame at depth 2 + // ├─ PUSH (depth 2) + // ├─ CALL B (depth 2) -> grandchild frame at depth 3 + // │ ├─ ADD (depth 3) + // │ └─ STOP (depth 3) + // └─ STOP (depth 2) + structlogs := []execution.StructLog{ + {Op: "CALL", Gas: 100000, GasCost: 100, Depth: 1}, // index 0: CALL A + {Op: "PUSH1", Gas: 80000, GasCost: 3, Depth: 2}, // index 1: direct child of A + {Op: "CALL", Gas: 79000, GasCost: 100, Depth: 2}, // index 2: CALL B (direct child of A) + {Op: "ADD", Gas: 50000, GasCost: 3, Depth: 3}, // index 3: direct child of B + {Op: "STOP", Gas: 49000, GasCost: 0, Depth: 3}, // index 4: direct child of B + {Op: "STOP", Gas: 75000, GasCost: 0, Depth: 2}, // index 5: direct child of A + {Op: "POP", Gas: 90000, GasCost: 2, Depth: 1}, // index 6: back to depth 1 + } + + // gas_used values: + // CALL A[0]: 100000 - 90000 = 10000 (includes all nested) + // PUSH[1]: 80000 - 79000 = 1000 + // CALL B[2]: 79000 - 75000 = 4000 (includes grandchild) + // ADD[3]: 50000 - 49000 = 1000 + // STOP[4]: 0 (pre-calculated) + // STOP[5]: 0 (pre-calculated) + // POP[6]: 2 (pre-calculated) + gasUsed := []uint64{10000, 1000, 4000, 1000, 0, 0, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 7) + + // CALL A: direct children at depth 2 are indices 1, 2, 5 + // sum of direct children = 1000 + 4000 + 0 = 5000 + // gas_self = 10000 - 5000 = 5000 + // Note: We do NOT include indices 3, 4 (depth 3) because they're grandchildren, + // and CALL B's gas_used (4000) already includes them. + assert.Equal(t, uint64(5000), result[0], "CALL A gas_self should exclude nested CALL's children") + + // CALL B: direct children at depth 3 are indices 3, 4 + // sum of direct children = 1000 + 0 = 1000 + // gas_self = 4000 - 1000 = 3000 + assert.Equal(t, uint64(3000), result[2], "CALL B gas_self should be its overhead") + + // Non-CALL opcodes: gas_self == gas_used + assert.Equal(t, uint64(1000), result[1], "PUSH gas_self") + assert.Equal(t, uint64(1000), result[3], "ADD gas_self") + assert.Equal(t, uint64(0), result[4], "STOP depth 3 gas_self") + assert.Equal(t, uint64(0), result[5], "STOP depth 2 gas_self") + assert.Equal(t, uint64(2), result[6], "POP gas_self") +} + +func TestComputeGasSelf_SiblingCalls(t *testing.T) { + // Two sibling CALLs at the same depth, each with their own children + structlogs := []execution.StructLog{ + {Op: "CALL", Gas: 100000, GasCost: 100, Depth: 1}, // index 0: first CALL + {Op: "ADD", Gas: 60000, GasCost: 3, Depth: 2}, // index 1: child of first CALL + {Op: "STOP", Gas: 59000, GasCost: 0, Depth: 2}, // index 2: child of first CALL + {Op: "CALL", Gas: 90000, GasCost: 100, Depth: 1}, // index 3: second CALL + {Op: "MUL", Gas: 50000, GasCost: 5, Depth: 2}, // index 4: child of second CALL + {Op: "STOP", Gas: 49000, GasCost: 0, Depth: 2}, // index 5: child of second CALL + {Op: "POP", Gas: 80000, GasCost: 2, Depth: 1}, // index 6 + } + + // gas_used: + // CALL[0]: 100000 - 90000 = 10000 + // ADD[1]: 60000 - 59000 = 1000 + // STOP[2]: 0 + // CALL[3]: 90000 - 80000 = 10000 + // MUL[4]: 50000 - 49000 = 1000 + // STOP[5]: 0 + // POP[6]: 2 + gasUsed := []uint64{10000, 1000, 0, 10000, 1000, 0, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 7) + + // First CALL: direct children = indices 1, 2 + // gas_self = 10000 - (1000 + 0) = 9000 + assert.Equal(t, uint64(9000), result[0], "first CALL gas_self") + + // Second CALL: direct children = indices 4, 5 + // gas_self = 10000 - (1000 + 0) = 9000 + assert.Equal(t, uint64(9000), result[3], "second CALL gas_self") +} + +func TestComputeGasSelf_CreateOpcode(t *testing.T) { + // CREATE should be handled the same as CALL + structlogs := []execution.StructLog{ + {Op: "CREATE", Gas: 100000, GasCost: 32000, Depth: 1}, // index 0 + {Op: "PUSH1", Gas: 70000, GasCost: 3, Depth: 2}, // index 1: constructor + {Op: "RETURN", Gas: 69000, GasCost: 0, Depth: 2}, // index 2: constructor + {Op: "POP", Gas: 80000, GasCost: 2, Depth: 1}, // index 3 + } + + // gas_used: + // CREATE[0]: 100000 - 80000 = 20000 + // PUSH[1]: 70000 - 69000 = 1000 + // RETURN[2]: 0 + // POP[3]: 2 + gasUsed := []uint64{20000, 1000, 0, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 4) + + // CREATE: direct children = indices 1, 2 + // gas_self = 20000 - (1000 + 0) = 19000 + assert.Equal(t, uint64(19000), result[0], "CREATE gas_self should be overhead only") + assert.Equal(t, uint64(1000), result[1], "PUSH gas_self") + assert.Equal(t, uint64(0), result[2], "RETURN gas_self") + assert.Equal(t, uint64(2), result[3], "POP gas_self") +} + +func TestComputeGasSelf_DelegateCallAndStaticCall(t *testing.T) { + // DELEGATECALL and STATICCALL should also be handled + structlogs := []execution.StructLog{ + {Op: "DELEGATECALL", Gas: 100000, GasCost: 100, Depth: 1}, + {Op: "ADD", Gas: 60000, GasCost: 3, Depth: 2}, + {Op: "STOP", Gas: 59000, GasCost: 0, Depth: 2}, + {Op: "STATICCALL", Gas: 90000, GasCost: 100, Depth: 1}, + {Op: "MUL", Gas: 50000, GasCost: 5, Depth: 2}, + {Op: "STOP", Gas: 49000, GasCost: 0, Depth: 2}, + {Op: "POP", Gas: 80000, GasCost: 2, Depth: 1}, + } + + gasUsed := []uint64{10000, 1000, 0, 10000, 1000, 0, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 7) + + // DELEGATECALL: gas_self = 10000 - 1000 = 9000 + assert.Equal(t, uint64(9000), result[0], "DELEGATECALL gas_self") + + // STATICCALL: gas_self = 10000 - 1000 = 9000 + assert.Equal(t, uint64(9000), result[3], "STATICCALL gas_self") +} + +func TestComputeGasSelf_CallWithNoChildren(t *testing.T) { + // CALL to precompile or empty contract - no child opcodes + // In this case, gas_self should equal gas_used + structlogs := []execution.StructLog{ + {Op: "CALL", Gas: 100000, GasCost: 100, Depth: 1}, + {Op: "POP", Gas: 97400, GasCost: 2, Depth: 1}, // immediately back at depth 1 + } + + // gas_used: + // CALL: 100000 - 97400 = 2600 (just the CALL overhead, no child execution) + // POP: 2 + gasUsed := []uint64{2600, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 2) + + // No children, so gas_self = gas_used + assert.Equal(t, uint64(2600), result[0], "CALL with no children: gas_self == gas_used") + assert.Equal(t, uint64(2), result[1], "POP gas_self") +} + +func TestComputeGasSelf_DeeplyNestedCalls(t *testing.T) { + // Test 4 levels of nesting to ensure correct handling + structlogs := []execution.StructLog{ + {Op: "CALL", Gas: 100000, GasCost: 100, Depth: 1}, // index 0: A + {Op: "CALL", Gas: 90000, GasCost: 100, Depth: 2}, // index 1: B + {Op: "CALL", Gas: 80000, GasCost: 100, Depth: 3}, // index 2: C + {Op: "CALL", Gas: 70000, GasCost: 100, Depth: 4}, // index 3: D + {Op: "ADD", Gas: 60000, GasCost: 3, Depth: 5}, // index 4: innermost + {Op: "STOP", Gas: 59000, GasCost: 0, Depth: 5}, // index 5 + {Op: "STOP", Gas: 65000, GasCost: 0, Depth: 4}, // index 6 + {Op: "STOP", Gas: 74000, GasCost: 0, Depth: 3}, // index 7 + {Op: "STOP", Gas: 83000, GasCost: 0, Depth: 2}, // index 8 + {Op: "POP", Gas: 92000, GasCost: 2, Depth: 1}, // index 9 + } + + // gas_used: + // A[0]: 100000 - 92000 = 8000 + // B[1]: 90000 - 83000 = 7000 + // C[2]: 80000 - 74000 = 6000 + // D[3]: 70000 - 65000 = 5000 + // ADD[4]: 60000 - 59000 = 1000 + // STOP[5]: 0 + // STOP[6]: 0 + // STOP[7]: 0 + // STOP[8]: 0 + // POP[9]: 2 + gasUsed := []uint64{8000, 7000, 6000, 5000, 1000, 0, 0, 0, 0, 2} + + result := ComputeGasSelf(structlogs, gasUsed) + + require.Len(t, result, 10) + + // CALL A: direct children at depth 2 = [B, STOP] = indices 1, 8 + // gas_self = 8000 - (7000 + 0) = 1000 + assert.Equal(t, uint64(1000), result[0], "CALL A gas_self") + + // CALL B: direct children at depth 3 = [C, STOP] = indices 2, 7 + // gas_self = 7000 - (6000 + 0) = 1000 + assert.Equal(t, uint64(1000), result[1], "CALL B gas_self") + + // CALL C: direct children at depth 4 = [D, STOP] = indices 3, 6 + // gas_self = 6000 - (5000 + 0) = 1000 + assert.Equal(t, uint64(1000), result[2], "CALL C gas_self") + + // CALL D: direct children at depth 5 = [ADD, STOP] = indices 4, 5 + // gas_self = 5000 - (1000 + 0) = 4000 + assert.Equal(t, uint64(4000), result[3], "CALL D gas_self") +} diff --git a/pkg/processor/transaction/structlog/processor.go b/pkg/processor/transaction/structlog/processor.go index 5a2b66c..8abcf9b 100644 --- a/pkg/processor/transaction/structlog/processor.go +++ b/pkg/processor/transaction/structlog/processor.go @@ -4,17 +4,27 @@ import ( "context" "fmt" "math" + "math/big" + "sync" + "time" + "github.com/ClickHouse/ch-go" "github.com/hibiken/asynq" "github.com/redis/go-redis/v9" "github.com/sirupsen/logrus" "github.com/ethpandaops/execution-processor/pkg/clickhouse" + "github.com/ethpandaops/execution-processor/pkg/common" "github.com/ethpandaops/execution-processor/pkg/ethereum" "github.com/ethpandaops/execution-processor/pkg/processor/tracker" + "github.com/ethpandaops/execution-processor/pkg/rowbuffer" "github.com/ethpandaops/execution-processor/pkg/state" ) +const ( + metricsUpdateInterval = 15 * time.Second +) + // Compile-time interface compliance check. var _ tracker.BlockProcessor = (*Processor)(nil) @@ -42,8 +52,17 @@ type Processor struct { redisPrefix string pendingTracker *tracker.PendingTracker + // Row buffer for batched ClickHouse inserts + rowBuffer *rowbuffer.Buffer[Structlog] + // Embedded limiter for shared blocking/completion logic *tracker.Limiter + + // Background metrics worker fields + metricsStop chan struct{} + metricsWg sync.WaitGroup + metricsStarted bool + metricsStartedMu sync.Mutex } // New creates a new transaction structlog processor. @@ -63,6 +82,15 @@ func New(deps *Dependencies, config *Config) (*Processor, error) { config.MaxPendingBlockRange = tracker.DefaultMaxPendingBlockRange } + // Set buffer defaults + if config.BufferMaxRows <= 0 { + config.BufferMaxRows = DefaultBufferMaxRows + } + + if config.BufferFlushInterval <= 0 { + config.BufferFlushInterval = DefaultBufferFlushInterval + } + log := deps.Log.WithField("processor", ProcessorName) pendingTracker := tracker.NewPendingTracker(deps.RedisClient, deps.RedisPrefix, log) @@ -95,9 +123,24 @@ func New(deps *Dependencies, config *Config) (*Processor, error) { processor.network = deps.Network + // Create the row buffer with the flush function + processor.rowBuffer = rowbuffer.New( + rowbuffer.Config{ + MaxRows: config.BufferMaxRows, + FlushInterval: config.BufferFlushInterval, + Network: deps.Network.Name, + Processor: ProcessorName, + Table: config.Table, + }, + processor.flushRows, + log, + ) + processor.log.WithFields(logrus.Fields{ "network": processor.network.Name, "max_pending_block_range": config.MaxPendingBlockRange, + "buffer_max_rows": config.BufferMaxRows, + "buffer_flush_interval": config.BufferFlushInterval, }).Info("Detected network") return processor, nil @@ -110,6 +153,14 @@ func (p *Processor) Start(ctx context.Context) error { return fmt.Errorf("failed to start ClickHouse client: %w", err) } + // Start the row buffer + if err := p.rowBuffer.Start(ctx); err != nil { + return fmt.Errorf("failed to start row buffer: %w", err) + } + + // Start the background metrics worker + p.startMetricsWorker() + p.log.Info("Transaction structlog processor ready") return nil @@ -119,6 +170,14 @@ func (p *Processor) Start(ctx context.Context) error { func (p *Processor) Stop(ctx context.Context) error { p.log.Info("Stopping transaction structlog processor") + // Stop the background metrics worker + p.stopMetricsWorker() + + // Stop the row buffer first (flushes remaining rows) + if err := p.rowBuffer.Stop(ctx); err != nil { + p.log.WithError(err).Error("Failed to stop row buffer") + } + // Stop the ClickHouse client return p.clickhouse.Stop() } @@ -174,3 +233,148 @@ func (p *Processor) getProcessForwardsQueue() string { func (p *Processor) getProcessBackwardsQueue() string { return tracker.PrefixedProcessBackwardsQueue(ProcessorName, p.redisPrefix) } + +// flushRows is the flush function for the row buffer. +func (p *Processor) flushRows(ctx context.Context, rows []Structlog) error { + if len(rows) == 0 { + return nil + } + + // Add timeout for ClickHouse operation + insertCtx, cancel := context.WithTimeout(ctx, tracker.DefaultClickHouseTimeout) + defer cancel() + + cols := NewColumns() + + for _, sl := range rows { + cols.Append( + sl.UpdatedDateTime.Time(), + sl.BlockNumber, + sl.TransactionHash, + sl.TransactionIndex, + sl.TransactionGas, + sl.TransactionFailed, + sl.TransactionReturnValue, + sl.Index, + sl.Operation, + sl.Gas, + sl.GasCost, + sl.GasUsed, + sl.GasSelf, + sl.Depth, + sl.ReturnData, + sl.Refund, + sl.Error, + sl.CallToAddress, + sl.CallFrameID, + sl.CallFramePath, + sl.MetaNetworkName, + ) + } + + input := cols.Input() + + if err := p.clickhouse.Do(insertCtx, ch.Query{ + Body: input.Into(p.config.Table), + Input: input, + }); err != nil { + common.ClickHouseInsertsRows.WithLabelValues( + p.network.Name, ProcessorName, p.config.Table, "failed", "", + ).Add(float64(len(rows))) + + return fmt.Errorf("failed to insert structlogs: %w", err) + } + + common.ClickHouseInsertsRows.WithLabelValues( + p.network.Name, ProcessorName, p.config.Table, "success", "", + ).Add(float64(len(rows))) + + return nil +} + +// insertStructlogs submits structlogs to the row buffer for batched insertion. +func (p *Processor) insertStructlogs(ctx context.Context, structlogs []Structlog) error { + return p.rowBuffer.Submit(ctx, structlogs) +} + +// startMetricsWorker starts the background metrics update worker. +func (p *Processor) startMetricsWorker() { + p.metricsStartedMu.Lock() + defer p.metricsStartedMu.Unlock() + + if p.metricsStarted { + return + } + + p.metricsStarted = true + p.metricsStop = make(chan struct{}) + p.metricsWg.Add(1) + + go p.runMetricsWorker() +} + +// stopMetricsWorker stops the background metrics update worker. +func (p *Processor) stopMetricsWorker() { + p.metricsStartedMu.Lock() + defer p.metricsStartedMu.Unlock() + + if !p.metricsStarted { + return + } + + close(p.metricsStop) + p.metricsWg.Wait() + p.metricsStarted = false +} + +// runMetricsWorker runs the background metrics update loop. +func (p *Processor) runMetricsWorker() { + defer p.metricsWg.Done() + + ticker := time.NewTicker(metricsUpdateInterval) + defer ticker.Stop() + + // Do initial update + p.updateMetricsBackground() + + for { + select { + case <-p.metricsStop: + return + case <-ticker.C: + p.updateMetricsBackground() + } + } +} + +// updateMetricsBackground updates expensive metrics in the background. +func (p *Processor) updateMetricsBackground() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Update blocks stored min/max + minBlock, maxBlock, err := p.stateManager.GetMinMaxStoredBlocks(ctx, p.network.Name, ProcessorName) + if err != nil { + p.log.WithError(err).WithField("network", p.network.Name).Debug("failed to get min/max stored blocks") + } else if minBlock != nil && maxBlock != nil { + common.BlocksStored.WithLabelValues(p.network.Name, ProcessorName, "min").Set(float64(minBlock.Int64())) + common.BlocksStored.WithLabelValues(p.network.Name, ProcessorName, "max").Set(float64(maxBlock.Int64())) + } + + // Update head distance metric + node := p.pool.GetHealthyExecutionNode() + + if node != nil { + if latestBlockNum, err := node.BlockNumber(ctx); err == nil && latestBlockNum != nil { + executionHead := new(big.Int).SetUint64(*latestBlockNum) + + distance, headType, err := p.stateManager.GetHeadDistance(ctx, ProcessorName, p.network.Name, p.processingMode, executionHead) + if err != nil { + p.log.WithError(err).Debug("Failed to calculate head distance in background metrics") + common.HeadDistance.WithLabelValues(p.network.Name, ProcessorName, "error").Set(-1) + } else { + common.HeadDistance.WithLabelValues(p.network.Name, ProcessorName, headType).Set(float64(distance)) + } + } + } +} diff --git a/pkg/processor/transaction/structlog/processor_test.go b/pkg/processor/transaction/structlog/processor_test.go index 571039e..1137a0c 100644 --- a/pkg/processor/transaction/structlog/processor_test.go +++ b/pkg/processor/transaction/structlog/processor_test.go @@ -22,7 +22,6 @@ func TestProcessor_Creation(t *testing.T) { Config: clickhouse.Config{ Addr: "localhost:9000", }, - ChunkSize: 10000, } // Test config validation @@ -44,7 +43,6 @@ func TestProcessor_ConfigValidation(t *testing.T) { Config: clickhouse.Config{ Addr: "localhost:9000", }, - ChunkSize: 10000, }, expectError: false, }, @@ -58,9 +56,8 @@ func TestProcessor_ConfigValidation(t *testing.T) { { name: "missing addr", config: transaction_structlog.Config{ - Enabled: true, - Table: "test_table", - ChunkSize: 10000, + Enabled: true, + Table: "test_table", }, expectError: true, }, @@ -71,7 +68,6 @@ func TestProcessor_ConfigValidation(t *testing.T) { Config: clickhouse.Config{ Addr: "localhost:9000", }, - ChunkSize: 10000, }, expectError: true, }, @@ -104,7 +100,6 @@ func TestProcessor_ConcurrentConfigValidation(t *testing.T) { Config: clickhouse.Config{ Addr: "localhost:9000", }, - ChunkSize: 10000, } results <- cfg.Validate() }() @@ -150,16 +145,18 @@ func TestStructlogCountReturn(t *testing.T) { mockTrace.Failed, // txFailed mockTrace.ReturnValue, // txReturnValue uint32(i), // index - structLog.PC, // pc structLog.Op, // op structLog.Gas, // gas structLog.GasCost, // gasCost structLog.GasCost, // gasUsed (simplified) + structLog.GasCost, // gasSelf (simplified) structLog.Depth, // depth structLog.ReturnData, // returnData structLog.Refund, // refund structLog.Error, // error nil, // callTo + uint32(0), // callFrameID + []uint32{}, // callFramePath "test", // network ) } @@ -228,53 +225,28 @@ func TestMemoryManagement(t *testing.T) { uint64(i), // blockNumber "0x1234567890abcdef1234567890abcdef12345678", // txHash uint32(i%100), // txIndex - 21000, // txGas + uint64(21000), // txGas false, // txFailed nil, // txReturnValue uint32(i), // index - uint32(i*2), // pc "SSTORE", // op uint64(21000-i), // gas - 5000, // gasCost - 5000, // gasUsed - 1, // depth + uint64(5000), // gasCost + uint64(5000), // gasUsed + uint64(5000), // gasSelf + uint64(1), // depth nil, // returnData nil, // refund nil, // error nil, // callTo + uint32(0), // callFrameID + []uint32{}, // callFramePath "mainnet", // network ) } assert.Equal(t, rowCount, cols.Rows(), "Should have correct row count") - // Test that chunking calculations work properly - const chunkSize = 100 - - expectedChunks := (rowCount + chunkSize - 1) / chunkSize - - // Verify chunking logic - actualChunks := 0 - - for i := 0; i < rowCount; i += chunkSize { - actualChunks++ - - end := i + chunkSize - if end > rowCount { - end = rowCount - } - - // Verify chunk size constraints - chunkLen := end - i - if chunkLen <= 0 || chunkLen > chunkSize { - t.Errorf("Invalid chunk size: %d (expected 1-%d)", chunkLen, chunkSize) - } - } - - if actualChunks != expectedChunks { - t.Errorf("Expected %d chunks, got %d", expectedChunks, actualChunks) - } - // Reset columns to free memory cols.Reset() assert.Equal(t, 0, cols.Rows(), "Reset should clear all rows") @@ -298,86 +270,6 @@ func TestMemoryManagement(t *testing.T) { } } -func TestChunkProcessing(t *testing.T) { - tests := []struct { - name string - inputSize int - expectedChunks int - chunkSize int - }{ - { - name: "small input", - inputSize: 50, - expectedChunks: 1, - chunkSize: 100, - }, - { - name: "exact chunk size", - inputSize: 100, - expectedChunks: 1, - chunkSize: 100, - }, - { - name: "multiple chunks", - inputSize: 250, - expectedChunks: 3, - chunkSize: 100, - }, - { - name: "large input", - inputSize: 1500, - expectedChunks: 15, - chunkSize: 100, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Test chunking logic using Columns - cols := transaction_structlog.NewColumns() - now := time.Now() - - // Fill columns with test data - for i := 0; i < tt.inputSize; i++ { - cols.Append( - now, uint64(i), "0xtest", 0, 21000, false, nil, - uint32(i), uint32(i), "PUSH1", 20000, 3, 3, 1, - nil, nil, nil, nil, "test", - ) - } - - assert.Equal(t, tt.inputSize, cols.Rows(), "Should have correct row count") - - // Calculate expected chunks - expectedChunks := (tt.inputSize + tt.chunkSize - 1) / tt.chunkSize - - if expectedChunks != tt.expectedChunks { - t.Errorf("Expected %d chunks for %d items, got %d", tt.expectedChunks, tt.inputSize, expectedChunks) - } - - // Test that the chunking logic would work correctly - chunkCount := 0 - - for i := 0; i < tt.inputSize; i += tt.chunkSize { - chunkCount++ - - end := i + tt.chunkSize - if end > tt.inputSize { - end = tt.inputSize - } - // Verify chunk boundaries - if end <= i { - t.Errorf("Invalid chunk boundaries: start=%d, end=%d", i, end) - } - } - - if chunkCount != tt.expectedChunks { - t.Errorf("Chunking produced %d chunks, expected %d", chunkCount, tt.expectedChunks) - } - }) - } -} - func TestColumnsAppendAndReset(t *testing.T) { cols := transaction_structlog.NewColumns() now := time.Now() @@ -390,9 +282,9 @@ func TestColumnsAppendAndReset(t *testing.T) { num := uint64(42) cols.Append( - now, 100, "0xabc", 0, 21000, false, &str, - 0, 100, "PUSH1", 20000, 3, 3, 1, - nil, &num, nil, nil, "mainnet", + now, uint64(100), "0xabc", uint32(0), uint64(21000), false, &str, + uint32(0), "PUSH1", uint64(20000), uint64(3), uint64(3), uint64(3), uint64(1), + nil, &num, nil, nil, uint32(0), []uint32{}, "mainnet", ) assert.Equal(t, 1, cols.Rows()) @@ -400,9 +292,9 @@ func TestColumnsAppendAndReset(t *testing.T) { // Append more rows for i := 0; i < 99; i++ { cols.Append( - now, 100, "0xabc", 0, 21000, false, nil, - uint32(i+1), 100, "PUSH1", 20000, 3, 3, 1, - nil, nil, nil, nil, "mainnet", + now, uint64(100), "0xabc", uint32(0), uint64(21000), false, nil, + uint32(i+1), "PUSH1", uint64(20000), uint64(3), uint64(3), uint64(3), uint64(1), + nil, nil, nil, nil, uint32(0), []uint32{}, "mainnet", ) } @@ -417,10 +309,10 @@ func TestColumnsInput(t *testing.T) { cols := transaction_structlog.NewColumns() input := cols.Input() - // Verify all 19 columns are present - assert.Len(t, input, 19) + // Verify all 21 columns are present + assert.Len(t, input, 21) assert.Equal(t, "updated_date_time", input[0].Name) - assert.Equal(t, "meta_network_name", input[18].Name) + assert.Equal(t, "meta_network_name", input[20].Name) } // Tests from tasks_test.go diff --git a/pkg/processor/transaction/structlog/transaction_processing.go b/pkg/processor/transaction/structlog/transaction_processing.go index 6902d18..ff10c9a 100644 --- a/pkg/processor/transaction/structlog/transaction_processing.go +++ b/pkg/processor/transaction/structlog/transaction_processing.go @@ -3,20 +3,149 @@ package structlog import ( "context" "fmt" - "io" + "strings" "time" - "github.com/ClickHouse/ch-go" - "github.com/ethereum/go-ethereum/core/types" - "github.com/sirupsen/logrus" - - "github.com/ethpandaops/execution-processor/pkg/common" + pcommon "github.com/ethpandaops/execution-processor/pkg/common" "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" "github.com/ethpandaops/execution-processor/pkg/processor/tracker" ) -// ProcessTransaction processes a transaction using ch-go columnar streaming. -func (p *Processor) ProcessTransaction(ctx context.Context, block *types.Block, index int, tx *types.Transaction) (int, error) { +// Structlog represents a single EVM opcode execution within a transaction trace. +// See gas_cost.go for detailed documentation on the gas fields. +// +//nolint:tagliatelle // ClickHouse uses snake_case column names +type Structlog struct { + UpdatedDateTime ClickHouseTime `json:"updated_date_time"` + BlockNumber uint64 `json:"block_number"` + TransactionHash string `json:"transaction_hash"` + TransactionIndex uint32 `json:"transaction_index"` + TransactionGas uint64 `json:"transaction_gas"` + TransactionFailed bool `json:"transaction_failed"` + TransactionReturnValue *string `json:"transaction_return_value"` + Index uint32 `json:"index"` + Operation string `json:"operation"` + + // Gas is the remaining gas before this opcode executes. + Gas uint64 `json:"gas"` + + // GasCost is from the execution node trace. For CALL/CREATE opcodes, this is the + // gas stipend passed to the child frame, not the call overhead. + GasCost uint64 `json:"gas_cost"` + + // GasUsed is computed as gas[i] - gas[i+1] at the same depth level. + // For CALL/CREATE opcodes, this includes the call overhead plus all child frame gas. + // Summing across all opcodes will double count child frame gas. + GasUsed uint64 `json:"gas_used"` + + // GasSelf excludes child frame gas. For CALL/CREATE opcodes, this is just the call + // overhead (warm/cold access, memory expansion). For other opcodes, equals GasUsed. + // Summing across all opcodes gives total execution gas without double counting. + GasSelf uint64 `json:"gas_self"` + + Depth uint64 `json:"depth"` + ReturnData *string `json:"return_data"` + Refund *uint64 `json:"refund"` + Error *string `json:"error"` + CallToAddress *string `json:"call_to_address"` + CallFrameID uint32 `json:"call_frame_id"` + CallFramePath []uint32 `json:"call_frame_path"` + MetaNetworkName string `json:"meta_network_name"` +} + +// isCallOpcode returns true if the opcode initiates a call that creates a child frame. +// Note: CREATE/CREATE2 always execute code (constructor), so they always increase depth. +// CALL-type opcodes may target EOAs (no code) or precompiles (special handling). +func isCallOpcode(op string) bool { + switch op { + case OpcodeCALL, OpcodeCALLCODE, OpcodeDELEGATECALL, OpcodeSTATICCALL: + return true + default: + return false + } +} + +// precompileAddresses contains all known EVM precompile addresses. +// +// Precompile calls don't appear in trace_transaction results (unlike EOA calls which do). +// This is used to distinguish EOA calls from precompile calls when depth doesn't increase. +// +// Note: Low addresses like 0x5c, 0x60, etc. are NOT precompiles - they're real EOAs/contracts +// deployed early in Ethereum's history. Only the addresses below are actual precompiles. +// +// Addresses sourced from go-ethereum PrecompiledContractsOsaka (superset of all forks): +// https://github.com/ethereum/go-ethereum/blob/master/core/vm/contracts.go +// +// We cannot import go-ethereum directly because it depends on github.com/holiman/bloomfilter/v2, +// which conflicts with Erigon's fork (github.com/AskAlexSharov/bloomfilter/v2) when this package +// is embedded in Erigon. The two bloomfilter versions have incompatible APIs. +var precompileAddresses = map[string]bool{ + "0x0000000000000000000000000000000000000001": true, // ecrecover + "0x0000000000000000000000000000000000000002": true, // sha256 + "0x0000000000000000000000000000000000000003": true, // ripemd160 + "0x0000000000000000000000000000000000000004": true, // identity (dataCopy) + "0x0000000000000000000000000000000000000005": true, // modexp (bigModExp) + "0x0000000000000000000000000000000000000006": true, // bn256Add (ecAdd) + "0x0000000000000000000000000000000000000007": true, // bn256ScalarMul (ecMul) + "0x0000000000000000000000000000000000000008": true, // bn256Pairing (ecPairing) + "0x0000000000000000000000000000000000000009": true, // blake2f + "0x000000000000000000000000000000000000000a": true, // kzgPointEvaluation (EIP-4844, Cancun) + "0x000000000000000000000000000000000000000b": true, // bls12381G1Add (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000c": true, // bls12381G1MultiExp (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000d": true, // bls12381G2Add (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000e": true, // bls12381G2MultiExp (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000f": true, // bls12381Pairing (EIP-2537, Osaka) + "0x0000000000000000000000000000000000000010": true, // bls12381MapG1 (EIP-2537, Osaka) + "0x0000000000000000000000000000000000000011": true, // bls12381MapG2 (EIP-2537, Osaka) + "0x0000000000000000000000000000000000000100": true, // p256Verify (EIP-7212, Osaka) +} + +// isPrecompile returns true if the address is a known EVM precompile. +// Precompile calls don't appear in trace_transaction results (unlike EOA calls which do). +func isPrecompile(addr string) bool { + // Normalize to lowercase with 0x prefix and full 40 hex chars + hex := strings.TrimPrefix(strings.ToLower(addr), "0x") + + for len(hex) < 40 { + hex = "0" + hex + } + + return precompileAddresses["0x"+hex] +} + +// ProcessSingleTransaction processes a single transaction and inserts its structlogs directly to ClickHouse. +func (p *Processor) ProcessSingleTransaction(ctx context.Context, block execution.Block, index int, tx execution.Transaction) (int, error) { + // Extract structlog data + structlogs, err := p.ExtractStructlogs(ctx, block, index, tx) + if err != nil { + return 0, err + } + + // Store count before processing + structlogCount := len(structlogs) + + // Ensure we clear the slice on exit to allow GC + defer func() { + // Clear the slice to release memory + structlogs = nil + }() + + // Send for direct insertion + if err := p.insertStructlogs(ctx, structlogs); err != nil { + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog", "failed").Inc() + + return 0, fmt.Errorf("failed to insert structlogs: %w", err) + } + + // Record success metrics + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog", "success").Inc() + + return structlogCount, nil +} + +// ProcessTransaction processes a transaction and inserts structlogs to ClickHouse. +func (p *Processor) ProcessTransaction(ctx context.Context, block execution.Block, index int, tx execution.Transaction) (int, error) { + // Get trace from execution node trace, err := p.getTransactionTrace(ctx, tx, block) if err != nil { return 0, fmt.Errorf("failed to get trace: %w", err) @@ -27,100 +156,141 @@ func (p *Processor) ProcessTransaction(ctx context.Context, block *types.Block, return 0, nil } - // Compute actual gas used for each structlog - gasUsed := ComputeGasUsed(trace.Structlogs) + // Check if GasUsed is pre-computed by the tracer (embedded mode). + // In embedded mode, skip the post-processing computation. + // In RPC mode, compute GasUsed from gas differences. + precomputedGasUsed := hasPrecomputedGasUsed(trace.Structlogs) - chunkSize := p.config.ChunkSize - if chunkSize == 0 { - chunkSize = tracker.DefaultChunkSize + var gasUsed []uint64 + if !precomputedGasUsed { + gasUsed = ComputeGasUsed(trace.Structlogs) + } else { + // Extract pre-computed GasUsed values from structlogs (embedded mode) + gasUsed = make([]uint64, len(trace.Structlogs)) + for i := range trace.Structlogs { + gasUsed[i] = trace.Structlogs[i].GasUsed + } } - cols := NewColumns() - input := cols.Input() + // Compute self gas (excludes child frame gas for CALL/CREATE opcodes) + gasSelf := ComputeGasSelf(trace.Structlogs, gasUsed) + + // Initialize call frame tracker + callTracker := NewCallTracker() + + // Check if CREATE/CREATE2 addresses are pre-computed by the tracer (embedded mode). + // In embedded mode, skip the multi-pass ComputeCreateAddresses scan. + precomputedCreateAddresses := hasPrecomputedCreateAddresses(trace.Structlogs) + + var createAddresses map[int]*string + if !precomputedCreateAddresses { + // Pre-compute CREATE/CREATE2 addresses from trace stack (RPC mode) + createAddresses = ComputeCreateAddresses(trace.Structlogs) + } - currentIdx := 0 blockNum := block.Number().Uint64() txHash := tx.Hash().String() txIndex := uint32(index) //nolint:gosec // index is bounded by block.Transactions() length now := time.Now() - // Use Do with OnInput for streaming insert - err = p.clickhouse.Do(ctx, ch.Query{ - Body: input.Into(p.config.Table), - Input: input, - OnInput: func(ctx context.Context) error { - // Reset columns for next chunk - cols.Reset() + // Pre-allocate slice for structlogs (estimate with some extra capacity for EOA frames) + structlogs := make([]Structlog, 0, totalCount+totalCount/10) - if currentIdx >= totalCount { - return io.EOF - } + // Build all structlogs + for i := 0; i < totalCount; i++ { + sl := &trace.Structlogs[i] - // Fill columns with next chunk - end := currentIdx + chunkSize - if end > totalCount { - end = totalCount - } + // Track call frame based on depth changes + frameID, framePath := callTracker.ProcessDepthChange(sl.Depth) - for i := currentIdx; i < end; i++ { - sl := &trace.Structlogs[i] - cols.Append( - now, - blockNum, - txHash, - txIndex, - trace.Gas, - trace.Failed, - trace.ReturnValue, - uint32(i), //nolint:gosec // index is bounded by structlogs length - sl.PC, - sl.Op, - sl.Gas, - sl.GasCost, - gasUsed[i], - sl.Depth, - sl.ReturnData, - sl.Refund, - sl.Error, - p.extractCallAddress(sl), - p.network.Name, - ) - - // Free original trace data immediately to help GC - trace.Structlogs[i] = execution.StructLog{} - } + callToAddr := p.extractCallAddressWithCreate(sl, i, createAddresses) + + structlogs = append(structlogs, Structlog{ + UpdatedDateTime: NewClickHouseTime(now), + BlockNumber: blockNum, + TransactionHash: txHash, + TransactionIndex: txIndex, + TransactionGas: trace.Gas, + TransactionFailed: trace.Failed, + TransactionReturnValue: trace.ReturnValue, + Index: uint32(i), //nolint:gosec // index is bounded by structlogs length + Operation: sl.Op, + Gas: sl.Gas, + GasCost: sl.GasCost, + GasUsed: gasUsed[i], + GasSelf: gasSelf[i], + Depth: sl.Depth, + ReturnData: sl.ReturnData, + Refund: sl.Refund, + Error: sl.Error, + CallToAddress: callToAddr, + CallFrameID: frameID, + CallFramePath: framePath, + MetaNetworkName: p.network.Name, + }) + + // Check for EOA call: CALL-type opcode where depth stays the same (immediate return) + // and target is not a precompile (precompiles don't create trace frames) + if isCallOpcode(sl.Op) && callToAddr != nil { + isEOACall := false - // Log progress for large transactions - progressThreshold := p.config.ProgressLogThreshold - if progressThreshold == 0 { - progressThreshold = tracker.DefaultProgressLogThreshold + if i+1 < totalCount { + // Next opcode exists - check if depth stayed the same + nextDepth := trace.Structlogs[i+1].Depth + if nextDepth == sl.Depth && !isPrecompile(*callToAddr) { + isEOACall = true + } } - if totalCount > progressThreshold && end%progressThreshold < chunkSize { - p.log.WithFields(logrus.Fields{ - "tx_hash": txHash, - "progress": fmt.Sprintf("%d/%d", end, totalCount), - }).Debug("Processing large transaction") + if isEOACall { + // Emit synthetic structlog for EOA frame + eoaFrameID, eoaFramePath := callTracker.IssueFrameID() + + structlogs = append(structlogs, Structlog{ + UpdatedDateTime: NewClickHouseTime(now), + BlockNumber: blockNum, + TransactionHash: txHash, + TransactionIndex: txIndex, + TransactionGas: trace.Gas, + TransactionFailed: trace.Failed, + TransactionReturnValue: trace.ReturnValue, + Index: uint32(i), //nolint:gosec // Same index as parent CALL + Operation: "", // Empty = synthetic EOA frame + Gas: 0, + GasCost: 0, + GasUsed: 0, + GasSelf: 0, + Depth: sl.Depth + 1, // One level deeper than caller + ReturnData: nil, + Refund: nil, + Error: sl.Error, // Inherit error if CALL failed + CallToAddress: callToAddr, // The EOA address + CallFrameID: eoaFrameID, + CallFramePath: eoaFramePath, + MetaNetworkName: p.network.Name, + }) } + } - currentIdx = end + // Free original trace data immediately to help GC + trace.Structlogs[i] = execution.StructLog{} + } - return nil - }, - }) - if err != nil { + rowCount := len(structlogs) + + // Submit to row buffer for batched insertion + if err := p.insertStructlogs(ctx, structlogs); err != nil { return 0, fmt.Errorf("insert failed: %w", err) } // Record success metrics - common.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog", "success").Inc() - common.ClickHouseInsertsRows.WithLabelValues(p.network.Name, ProcessorName, p.config.Table, "success", "").Add(float64(totalCount)) + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog", "success").Inc() - return totalCount, nil + return rowCount, nil } // getTransactionTrace gets the trace for a transaction. -func (p *Processor) getTransactionTrace(ctx context.Context, tx *types.Transaction, block *types.Block) (*execution.TraceTransaction, error) { +func (p *Processor) getTransactionTrace(ctx context.Context, tx execution.Transaction, block execution.Block) (*execution.TraceTransaction, error) { // Get execution node node := p.pool.GetHealthyExecutionNode() if node == nil { @@ -140,13 +310,306 @@ func (p *Processor) getTransactionTrace(ctx context.Context, tx *types.Transacti return trace, nil } -// extractCallAddress extracts the call address from a structlog if it's a CALL operation. +// formatAddress normalizes an address to exactly 42 characters (0x + 40 hex). +// +// Background: The EVM is a 256-bit (32-byte) stack machine. ALL stack values are 32 bytes, +// including addresses. When execution clients like Erigon/Geth return debug traces, the +// stack array contains raw 32-byte values as hex strings (66 chars with 0x prefix). +// +// However, Ethereum addresses are only 160 bits (20 bytes, 40 hex chars). In EVM/ABI encoding, +// addresses are stored in the LOWER 160 bits of the 32-byte word (right-aligned, left-padded +// with zeros). For example, address 0x7a250d5630b4cf539739df2c5dacb4c659f2488d on the stack: +// +// 0x0000000000000000000000007a250d5630b4cf539739df2c5dacb4c659f2488d +// |-------- upper 12 bytes (zeros) --------||---- lower 20 bytes (address) ----| +// +// Some contracts may have non-zero upper bytes in the stack value. The EVM ignores these +// when interpreting the value as an address - only the lower 20 bytes are used. +// +// This function handles three cases: +// 1. Short addresses (e.g., "0x1" for precompiles): left-pad with zeros to 40 hex chars +// 2. Full 32-byte stack values (66 chars): extract rightmost 40 hex chars (lower 160 bits) +// 3. Normal 42-char addresses: return as-is +func formatAddress(addr string) string { + // Remove 0x prefix if present + hex := strings.TrimPrefix(addr, "0x") + + // If longer than 40 chars, extract the lower 20 bytes (rightmost 40 hex chars). + // This handles raw 32-byte stack values from execution client traces. + if len(hex) > 40 { + hex = hex[len(hex)-40:] + } + + // Left-pad with zeros to 40 chars if shorter (handles precompiles like 0x1), + // then add 0x prefix + return fmt.Sprintf("0x%040s", hex) +} + +// extractCallAddress extracts the call address from a structlog for CALL-family opcodes. +// +// Supports two modes for backward compatibility: +// - Embedded mode: CallToAddress is pre-populated by the tracer, use directly. +// - RPC mode: CallToAddress is nil, extract from Stack[len-2] for CALL-family opcodes. +// +// Stack layout in Erigon/Geth debug traces: +// - Array index 0 = bottom of stack (oldest value, first pushed) +// - Array index len-1 = top of stack (newest value, first to be popped) +// +// When a CALL opcode executes, its arguments are at the top of the stack: +// +// CALL/CALLCODE: [..., retSize, retOffset, argsSize, argsOffset, value, addr, gas] +// DELEGATECALL/STATICCALL: [..., retSize, retOffset, argsSize, argsOffset, addr, gas] +// ^ ^ +// len-2 len-1 +// +// The address is always at Stack[len-2] (second from top), regardless of how many +// other values exist below the CALL arguments on the stack. +// +// Note: The stack value is a raw 32-byte word. The formatAddress function extracts +// the actual 20-byte address from the lower 160 bits. func (p *Processor) extractCallAddress(structLog *execution.StructLog) *string { - if structLog.Op == "CALL" && structLog.Stack != nil && len(*structLog.Stack) > 1 { + // Embedded mode: use pre-extracted CallToAddress + if structLog.CallToAddress != nil { + return structLog.CallToAddress + } + + // RPC mode fallback: extract from Stack for CALL-family opcodes + if structLog.Stack == nil || len(*structLog.Stack) < 2 { + return nil + } + + switch structLog.Op { + case "CALL", "CALLCODE", "DELEGATECALL", "STATICCALL": + // Extract the raw 32-byte stack value at the address position (second from top). + // formatAddress will normalize it to a proper 20-byte address. stackValue := (*structLog.Stack)[len(*structLog.Stack)-2] + addr := formatAddress(stackValue) + + return &addr + default: + return nil + } +} + +// extractCallAddressWithCreate extracts the call address, using createAddresses map for CREATE/CREATE2 opcodes. +func (p *Processor) extractCallAddressWithCreate(structLog *execution.StructLog, index int, createAddresses map[int]*string) *string { + // For CREATE/CREATE2, use the pre-computed address from the trace + if structLog.Op == "CREATE" || structLog.Op == "CREATE2" { + if createAddresses != nil { + return createAddresses[index] + } + + return nil + } + + return p.extractCallAddress(structLog) +} + +// ComputeCreateAddresses pre-computes the created contract addresses for all CREATE/CREATE2 opcodes. +// It scans the trace and extracts addresses from the stack when each CREATE's constructor returns. +// The returned map contains opcode index -> created address (only for CREATE/CREATE2 opcodes). +func ComputeCreateAddresses(structlogs []execution.StructLog) map[int]*string { + result := make(map[int]*string) + + // Track pending CREATE operations: (index, depth) + type pendingCreate struct { + index int + depth uint64 + } + + var pending []pendingCreate + + for i, log := range structlogs { + // Resolve pending CREATEs that have completed. + // A CREATE at depth D completes when we see an opcode at depth <= D + // (either immediately if CREATE failed, or after constructor returns). + for len(pending) > 0 { + last := pending[len(pending)-1] + + // If current opcode is at or below CREATE's depth and it's not the CREATE itself + if log.Depth <= last.depth && i > last.index { + // Extract address from top of stack (created address or 0 if failed) + if log.Stack != nil && len(*log.Stack) > 0 { + addr := formatAddress((*log.Stack)[len(*log.Stack)-1]) + result[last.index] = &addr + } + + pending = pending[:len(pending)-1] + } else { + break + } + } + + // Track new CREATE/CREATE2 + if log.Op == "CREATE" || log.Op == "CREATE2" { + pending = append(pending, pendingCreate{index: i, depth: log.Depth}) + } + } + + return result +} + +// ExtractStructlogs extracts structlog data from a transaction without inserting to database. +func (p *Processor) ExtractStructlogs(ctx context.Context, block execution.Block, index int, tx execution.Transaction) ([]Structlog, error) { + start := time.Now() + + defer func() { + duration := time.Since(start) + pcommon.TransactionProcessingDuration.WithLabelValues(p.network.Name, "structlog").Observe(duration.Seconds()) + }() + + // Get execution node + node := p.pool.GetHealthyExecutionNode() + if node == nil { + return nil, fmt.Errorf("no healthy execution node available") + } + + // Process transaction with timeout + processCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + // Get transaction trace + trace, err := node.DebugTraceTransaction(processCtx, tx.Hash().String(), block.Number(), execution.StackTraceOptions()) + if err != nil { + return nil, fmt.Errorf("failed to trace transaction: %w", err) + } + + // Convert trace to structlog rows + var structlogs []Structlog + + uIndex := uint32(index) //nolint:gosec // index is bounded by block.Transactions() length + + if trace != nil { + // Check if GasUsed is pre-computed by the tracer (embedded mode). + // In embedded mode, extract values from structlogs. + // In RPC mode, compute GasUsed from gas differences. + precomputedGasUsed := hasPrecomputedGasUsed(trace.Structlogs) + + var gasUsed []uint64 + if precomputedGasUsed { + // Extract pre-computed GasUsed values from structlogs (embedded mode) + gasUsed = make([]uint64, len(trace.Structlogs)) + for i := range trace.Structlogs { + gasUsed[i] = trace.Structlogs[i].GasUsed + } + } else { + // Compute GasUsed from gas differences (RPC mode) + gasUsed = ComputeGasUsed(trace.Structlogs) + } + + // Compute self gas (excludes child frame gas for CALL/CREATE opcodes) + gasSelf := ComputeGasSelf(trace.Structlogs, gasUsed) + + // Initialize call frame tracker + callTracker := NewCallTracker() + + // Check if CREATE/CREATE2 addresses are pre-computed by the tracer (embedded mode). + precomputedCreateAddresses := hasPrecomputedCreateAddresses(trace.Structlogs) + + var createAddresses map[int]*string + if !precomputedCreateAddresses { + // Pre-compute CREATE/CREATE2 addresses from trace stack (RPC mode) + createAddresses = ComputeCreateAddresses(trace.Structlogs) + } + + // Pre-allocate slice for better memory efficiency + structlogs = make([]Structlog, 0, len(trace.Structlogs)) + + for i, structLog := range trace.Structlogs { + // Track call frame based on depth changes + frameID, framePath := callTracker.ProcessDepthChange(structLog.Depth) + + callToAddr := p.extractCallAddressWithCreate(&structLog, i, createAddresses) + + // Get GasUsed: use pre-computed value from tracer (embedded) or computed value (RPC). + var gasUsedValue uint64 + if precomputedGasUsed { + gasUsedValue = structLog.GasUsed + } else { + gasUsedValue = gasUsed[i] + } + + row := Structlog{ + UpdatedDateTime: NewClickHouseTime(time.Now()), + BlockNumber: block.Number().Uint64(), + TransactionHash: tx.Hash().String(), + TransactionIndex: uIndex, + TransactionGas: trace.Gas, + TransactionFailed: trace.Failed, + TransactionReturnValue: trace.ReturnValue, + Index: uint32(i), //nolint:gosec // index is bounded by structlogs length + Operation: structLog.Op, + Gas: structLog.Gas, + GasCost: structLog.GasCost, + GasUsed: gasUsedValue, + GasSelf: gasSelf[i], + Depth: structLog.Depth, + ReturnData: structLog.ReturnData, + Refund: structLog.Refund, + Error: structLog.Error, + CallToAddress: callToAddr, + CallFrameID: frameID, + CallFramePath: framePath, + MetaNetworkName: p.network.Name, + } + + structlogs = append(structlogs, row) + + // Check for EOA call: CALL-type opcode where depth stays the same (immediate return) + // and target is not a precompile (precompiles don't create trace frames) + if isCallOpcode(structLog.Op) && callToAddr != nil { + isEOACall := false + + if i+1 < len(trace.Structlogs) { + // Next opcode exists - check if depth stayed the same + // Depth increase = entered contract code (not EOA) + // Depth decrease = call returned/failed (not EOA) + // Depth same = called EOA or precompile (immediate return) + nextDepth := trace.Structlogs[i+1].Depth + if nextDepth == structLog.Depth && !isPrecompile(*callToAddr) { + isEOACall = true + } + } + // Note: If last opcode is a CALL, we can't determine if it's EOA + // because we don't have a next opcode to compare depth with. + // These are typically failed calls at end of execution. + + if isEOACall { + // Emit synthetic structlog for EOA frame + eoaFrameID, eoaFramePath := callTracker.IssueFrameID() + + eoaRow := Structlog{ + UpdatedDateTime: NewClickHouseTime(time.Now()), + BlockNumber: block.Number().Uint64(), + TransactionHash: tx.Hash().String(), + TransactionIndex: uIndex, + TransactionGas: trace.Gas, + TransactionFailed: trace.Failed, + TransactionReturnValue: trace.ReturnValue, + Index: uint32(i), //nolint:gosec // Same index as parent CALL + Operation: "", // Empty = synthetic EOA frame + Gas: 0, + GasCost: 0, + GasUsed: 0, + GasSelf: 0, + Depth: structLog.Depth + 1, // One level deeper than caller + ReturnData: nil, + Refund: nil, + Error: structLog.Error, // Inherit error if CALL failed + CallToAddress: callToAddr, // The EOA address + CallFrameID: eoaFrameID, + CallFramePath: eoaFramePath, + MetaNetworkName: p.network.Name, + } + + structlogs = append(structlogs, eoaRow) + } + } + } - return &stackValue + // Clear the original trace data to free memory + trace.Structlogs = nil } - return nil + return structlogs, nil } diff --git a/pkg/processor/transaction/structlog_agg/aggregator.go b/pkg/processor/transaction/structlog_agg/aggregator.go new file mode 100644 index 0000000..d50ff1f --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/aggregator.go @@ -0,0 +1,414 @@ +package structlog_agg + +import ( + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +// CallFrameRow represents aggregated data for a single call frame or per-opcode aggregation. +// This is the output format that gets inserted into ClickHouse. +// Two types of rows: +// - Summary row: Operation="" contains frame-level metadata (call_type, target_address, etc.) +// - Per-opcode row: Operation="SSTORE" etc. contains gas/count for that specific opcode +type CallFrameRow struct { + CallFrameID uint32 + ParentCallFrameID *uint32 // nil for root frame + CallFramePath []uint32 // Path from root to this frame + Depth uint32 + TargetAddress *string + CallType string // CALL/DELEGATECALL/STATICCALL/CALLCODE/CREATE/CREATE2 (empty for root) + Operation string // Empty for summary row, opcode name for per-opcode rows + OpcodeCount uint64 + ErrorCount uint64 + Gas uint64 // SUM(gas_self) - excludes child frame gas + GasCumulative uint64 // For summary: frame gas_cumulative; for per-opcode: SUM(gas_used) + MinDepth uint32 // Per-opcode: MIN(depth); summary: same as Depth + MaxDepth uint32 // Per-opcode: MAX(depth); summary: same as Depth + GasRefund *uint64 // Root frame only (max refund from trace) + IntrinsicGas *uint64 // Root frame only (computed) +} + +// OpcodeStats tracks gas and count for a specific opcode within a frame. +type OpcodeStats struct { + Count uint64 + Gas uint64 // SUM(gas_self) - excludes child frame gas + GasCumulative uint64 // SUM(gas_used) - includes child frame gas for CALL/CREATE + ErrorCount uint64 + MinDepth uint32 + MaxDepth uint32 +} + +// FrameAccumulator tracks data for a single frame during processing. +type FrameAccumulator struct { + CallFrameID uint32 + CallFramePath []uint32 // Path from root to this frame + FirstOpcodeIndex uint32 + FirstGas uint64 // Gas at first opcode + LastGas uint64 // Gas at last opcode + LastGasUsed uint64 // GasUsed of last opcode + OpcodeCount uint64 + ErrorCount uint64 + MaxRefund uint64 + TargetAddress *string + CallType string + Depth uint32 + + // Per-opcode tracking + OpcodeStats map[string]*OpcodeStats // opcode -> stats +} + +// FrameAggregator aggregates structlog data into call frame rows. +type FrameAggregator struct { + frames map[uint32]*FrameAccumulator // frameID -> accumulator + frameList []uint32 // Ordered list of frame IDs for deterministic output +} + +// NewFrameAggregator creates a new FrameAggregator. +func NewFrameAggregator() *FrameAggregator { + return &FrameAggregator{ + frames: make(map[uint32]*FrameAccumulator, 16), + frameList: make([]uint32, 0, 16), + } +} + +// ProcessStructlog processes a single structlog entry and updates frame accumulators. +// Parameters: +// - sl: The structlog entry +// - index: Index of this structlog in the trace +// - frameID: The call frame ID for this structlog +// - framePath: The path from root to current frame +// - gasUsed: Pre-computed gas used for this opcode (includes child frame gas for CALL/CREATE) +// - gasSelf: Pre-computed gas used excluding child frame gas +// - callToAddr: Target address for CALL/CREATE opcodes (nil otherwise) +// - prevStructlog: Previous structlog (for detecting frame entry via CALL/CREATE) +func (fa *FrameAggregator) ProcessStructlog( + sl *execution.StructLog, + index int, + frameID uint32, + framePath []uint32, + gasUsed uint64, + gasSelf uint64, + callToAddr *string, + prevStructlog *execution.StructLog, +) { + acc, exists := fa.frames[frameID] + if !exists { + // New frame - initialize accumulator + acc = &FrameAccumulator{ + CallFrameID: frameID, + CallFramePath: framePath, + FirstOpcodeIndex: uint32(index), //nolint:gosec // index is bounded + FirstGas: sl.Gas, + Depth: uint32(sl.Depth), //nolint:gosec // depth is bounded by EVM + OpcodeStats: make(map[string]*OpcodeStats), + } + + // Determine call type and target address from the initiating opcode (previous structlog) + if frameID == 0 { + // Root frame - no initiating CALL opcode, use empty string + acc.CallType = "" + } else if prevStructlog != nil { + // Frame was entered via the previous opcode + acc.CallType = mapOpcodeToCallType(prevStructlog.Op) + + // Get target address from the CALL/CREATE opcode that initiated this frame + // This is either from prevStructlog.CallToAddress or passed in via callToAddr + if prevStructlog.CallToAddress != nil { + acc.TargetAddress = prevStructlog.CallToAddress + } + } + + fa.frames[frameID] = acc + fa.frameList = append(fa.frameList, frameID) + } + + // Update accumulator with this opcode's data + // Only count real opcodes, not synthetic EOA rows (operation = '') + if sl.Op != "" { + acc.OpcodeCount++ + + depth := uint32(sl.Depth) //nolint:gosec // depth is bounded by EVM + + // Track per-opcode stats + stats, ok := acc.OpcodeStats[sl.Op] + if !ok { + stats = &OpcodeStats{ + MinDepth: depth, + MaxDepth: depth, + } + acc.OpcodeStats[sl.Op] = stats + } + + stats.Count++ + stats.Gas += gasSelf // SUM(gas_self) - excludes child frame gas + stats.GasCumulative += gasUsed // SUM(gas_used) - includes child frame gas + + // Track min/max depth + if depth < stats.MinDepth { + stats.MinDepth = depth + } + + if depth > stats.MaxDepth { + stats.MaxDepth = depth + } + + // Track errors per opcode + if sl.Error != nil && *sl.Error != "" { + stats.ErrorCount++ + } + } + + acc.LastGas = sl.Gas + acc.LastGasUsed = gasUsed + + // Track errors (for frame total) + if sl.Error != nil && *sl.Error != "" { + acc.ErrorCount++ + } + + // Track max refund + if sl.Refund != nil && *sl.Refund > acc.MaxRefund { + acc.MaxRefund = *sl.Refund + } + + // If this is an empty operation (synthetic EOA frame), capture the target address + // Note: CallType is already set from the initiating CALL opcode (prevStructlog) + if sl.Op == "" && callToAddr != nil { + acc.TargetAddress = callToAddr + } +} + +// Finalize computes final call frame rows from the accumulated data. +// Returns the call frame rows ready for insertion. +// Emits two types of rows per frame: +// - Summary row: Operation="" with frame-level metadata and totals +// - Per-opcode rows: Operation="SSTORE" etc. with gas/count for that opcode +func (fa *FrameAggregator) Finalize(trace *execution.TraceTransaction, receiptGas uint64) []CallFrameRow { + if len(fa.frames) == 0 { + return nil + } + + // Estimate capacity: 1 summary row per frame + average ~10 unique opcodes per frame + rows := make([]CallFrameRow, 0, len(fa.frames)*11) + + // First pass: compute gas_cumulative for each frame + gasCumulative := make(map[uint32]uint64, len(fa.frames)) + for _, frameID := range fa.frameList { + acc := fa.frames[frameID] + // gas_cumulative = first_gas - last_gas + last_gas_used + // This accounts for all gas consumed within this frame and its children + if acc.FirstGas >= acc.LastGas { + gasCumulative[frameID] = acc.FirstGas - acc.LastGas + acc.LastGasUsed + } else { + // Edge case: shouldn't happen in valid traces + gasCumulative[frameID] = acc.LastGasUsed + } + } + + // Second pass: compute gas (self) for each frame and emit rows + for _, frameID := range fa.frameList { + acc := fa.frames[frameID] + + // Find direct children of this frame + var childGasSum uint64 + + for _, otherFrameID := range fa.frameList { + otherAcc := fa.frames[otherFrameID] + if len(otherAcc.CallFramePath) == len(acc.CallFramePath)+1 { + // Check if this frame is the parent + if isParentOf(acc.CallFramePath, otherAcc.CallFramePath) { + childGasSum += gasCumulative[otherFrameID] + } + } + } + + // gas = gas_cumulative - sum(children.gas_cumulative) + gasSum := gasCumulative[frameID] + + var gasSelf uint64 + + if gasSum >= childGasSum { + gasSelf = gasSum - childGasSum + } + + // Determine parent frame ID + var parentFrameID *uint32 + + if len(acc.CallFramePath) >= 2 { + parent := acc.CallFramePath[len(acc.CallFramePath)-2] + parentFrameID = &parent + } + + depth := acc.Depth - 1 // Convert from EVM depth (1-based) to 0-based + if frameID == 0 { + depth = 0 // Ensure root is depth 0 + } + + // Emit summary row (Operation="") + summaryRow := CallFrameRow{ + CallFrameID: frameID, + ParentCallFrameID: parentFrameID, + CallFramePath: acc.CallFramePath, + Depth: depth, + TargetAddress: acc.TargetAddress, + CallType: acc.CallType, + Operation: "", // Empty for summary row + OpcodeCount: acc.OpcodeCount, + ErrorCount: acc.ErrorCount, + Gas: gasSelf, + GasCumulative: gasCumulative[frameID], + MinDepth: depth, // For summary row, min/max depth = frame depth + MaxDepth: depth, + } + + // Root frame: compute gas refund and intrinsic gas + if frameID == 0 { + // Check trace.Failed to detect transaction failure from REVERT opcodes. + // REVERT executes successfully (no opcode error), but causes transaction failure. + // Individual opcode errors (like "out of gas") are already counted via acc.ErrorCount. + if trace.Failed && summaryRow.ErrorCount == 0 { + summaryRow.ErrorCount = 1 + } + + // For successful transactions, refunds are applied to the final gas calculation. + // For failed transactions, refunds are accumulated during execution but NOT applied + // (all gas up to failure is consumed), so we set refund to nil. + if summaryRow.ErrorCount == 0 { + summaryRow.GasRefund = &acc.MaxRefund + } + + // Intrinsic gas is ALWAYS charged (before EVM execution begins), regardless of + // whether the transaction succeeds or fails. For failed txs, use refund=0 in + // the formula since refunds aren't applied. + refundForCalc := acc.MaxRefund + if summaryRow.ErrorCount > 0 { + refundForCalc = 0 + } + + intrinsicGas := computeIntrinsicGas(gasCumulative[0], refundForCalc, receiptGas) + if intrinsicGas > 0 { + summaryRow.IntrinsicGas = &intrinsicGas + } + } + + rows = append(rows, summaryRow) + + // Emit per-opcode rows + for opcode, stats := range acc.OpcodeStats { + opcodeRow := CallFrameRow{ + CallFrameID: frameID, + ParentCallFrameID: parentFrameID, + CallFramePath: acc.CallFramePath, + Depth: depth, + TargetAddress: acc.TargetAddress, + CallType: acc.CallType, + Operation: opcode, + OpcodeCount: stats.Count, + ErrorCount: stats.ErrorCount, + Gas: stats.Gas, + GasCumulative: stats.GasCumulative, // SUM(gas_used) for per-opcode rows + MinDepth: stats.MinDepth, + MaxDepth: stats.MaxDepth, + GasRefund: nil, + IntrinsicGas: nil, + } + rows = append(rows, opcodeRow) + } + } + + return rows +} + +// isParentOf checks if parentPath is the direct parent of childPath. +func isParentOf(parentPath, childPath []uint32) bool { + if len(childPath) != len(parentPath)+1 { + return false + } + + for i := range parentPath { + if parentPath[i] != childPath[i] { + return false + } + } + + return true +} + +// mapOpcodeToCallType maps an opcode to a call type string. +func mapOpcodeToCallType(op string) string { + switch op { + case "CALL": + return "CALL" + case "CALLCODE": + return "CALLCODE" + case "DELEGATECALL": + return "DELEGATECALL" + case "STATICCALL": + return "STATICCALL" + case "CREATE": + return "CREATE" + case "CREATE2": + return "CREATE2" + default: + return "UNKNOWN" + } +} + +// computeIntrinsicGas computes the intrinsic gas for a transaction. +// This is the gas consumed before EVM execution begins (21000 base + calldata costs). +// +// Formula from int_transaction_call_frame.sql: +// +// IF gas_refund >= receipt_gas / 4 THEN +// intrinsic = receipt_gas * 5 / 4 - gas_cumulative (refund was capped) +// ELSE +// intrinsic = receipt_gas - gas_cumulative + gas_refund (uncapped) +func computeIntrinsicGas(gasCumulative, gasRefund, receiptGas uint64) uint64 { + if receiptGas == 0 { + return 0 + } + + var intrinsic uint64 + + if gasRefund >= receiptGas/4 { + // Capped case: refund was limited to receipt_gas/4 + // Actual refund applied = receipt_gas/4 + // So: receipt_gas = intrinsic + gas_cumulative - receipt_gas/4 + // => intrinsic = receipt_gas + receipt_gas/4 - gas_cumulative + // => intrinsic = receipt_gas * 5/4 - gas_cumulative + cappedValue := receiptGas * 5 / 4 + if cappedValue >= gasCumulative { + intrinsic = cappedValue - gasCumulative + } + } else { + // Uncapped case: full refund was applied + // receipt_gas = intrinsic + gas_cumulative - gas_refund + // => intrinsic = receipt_gas - gas_cumulative + gas_refund + // + // IMPORTANT: We must avoid underflow when receiptGas < gasCumulative. + // The guard condition (receiptGas + gasRefund >= gasCumulative) can pass + // even when receiptGas < gasCumulative, so we reorder the arithmetic + // to ensure we never subtract a larger value from a smaller one. + if receiptGas+gasRefund >= gasCumulative { + // Reorder to: (receiptGas + gasRefund) - gasCumulative + // This is safe because the guard ensures the sum >= gasCumulative + intrinsic = (receiptGas + gasRefund) - gasCumulative + } + } + + return intrinsic +} + +// SetRootTargetAddress sets the target address for the root frame (frame ID 0). +// This should be called after processing all structlogs, as the root frame's +// target address comes from the transaction's to_address, not from an initiating CALL. +func (fa *FrameAggregator) SetRootTargetAddress(addr *string) { + if acc, exists := fa.frames[0]; exists { + acc.TargetAddress = addr + } +} + +// Reset clears the aggregator for reuse. +func (fa *FrameAggregator) Reset() { + clear(fa.frames) + fa.frameList = fa.frameList[:0] +} diff --git a/pkg/processor/transaction/structlog_agg/aggregator_test.go b/pkg/processor/transaction/structlog_agg/aggregator_test.go new file mode 100644 index 0000000..3cacc82 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/aggregator_test.go @@ -0,0 +1,647 @@ +package structlog_agg + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" +) + +// getSummaryRow returns the summary row (Operation == "") for a given frame ID. +func getSummaryRow(rows []CallFrameRow, frameID uint32) *CallFrameRow { + for i := range rows { + if rows[i].Operation == "" && rows[i].CallFrameID == frameID { + return &rows[i] + } + } + + return nil +} + +// getOpcodeRow returns the per-opcode row for a given frame ID and operation. +func getOpcodeRow(rows []CallFrameRow, frameID uint32, operation string) *CallFrameRow { + for i := range rows { + if rows[i].Operation == operation && rows[i].CallFrameID == frameID { + return &rows[i] + } + } + + return nil +} + +// countSummaryRows counts the number of summary rows (Operation == ""). +func countSummaryRows(rows []CallFrameRow) int { + count := 0 + + for _, row := range rows { + if row.Operation == "" { + count++ + } + } + + return count +} + +func TestFrameAggregator_SingleFrame(t *testing.T) { + aggregator := NewFrameAggregator() + + // Simulate a simple transaction with only root frame + structlogs := []struct { + op string + depth uint64 + gas uint64 + gasUsed uint64 + refund *uint64 + errStr *string + }{ + {"PUSH1", 1, 1000, 3, nil, nil}, + {"PUSH1", 1, 997, 3, nil, nil}, + {"ADD", 1, 994, 3, nil, nil}, + {"STOP", 1, 991, 0, nil, nil}, + } + + framePath := []uint32{0} + + for i, sl := range structlogs { + execSl := &execution.StructLog{ + Op: sl.op, + Depth: sl.depth, + Gas: sl.gas, + } + + var prevSl *execution.StructLog + if i > 0 { + prevSl = &execution.StructLog{ + Op: structlogs[i-1].op, + Depth: structlogs[i-1].depth, + } + } + + // For simple opcodes, gasSelf == gasUsed + aggregator.ProcessStructlog(execSl, i, 0, framePath, sl.gasUsed, sl.gasUsed, nil, prevSl) + } + + trace := &execution.TraceTransaction{ + Gas: 1000, + Failed: false, + } + + frames := aggregator.Finalize(trace, 100) + + // Should have 1 summary row + 3 per-opcode rows (PUSH1, ADD, STOP) + assert.Equal(t, 1, countSummaryRows(frames)) + + // Check summary row + summaryRow := getSummaryRow(frames, 0) + require.NotNil(t, summaryRow) + assert.Equal(t, uint32(0), summaryRow.CallFrameID) + assert.Nil(t, summaryRow.ParentCallFrameID) + assert.Equal(t, uint32(0), summaryRow.Depth) + assert.Equal(t, uint64(4), summaryRow.OpcodeCount) + assert.Equal(t, uint64(0), summaryRow.ErrorCount) + assert.Equal(t, "", summaryRow.CallType) + assert.Equal(t, "", summaryRow.Operation) + + // Check per-opcode rows + push1Row := getOpcodeRow(frames, 0, "PUSH1") + require.NotNil(t, push1Row) + assert.Equal(t, uint64(2), push1Row.OpcodeCount) // 2x PUSH1 + + addRow := getOpcodeRow(frames, 0, "ADD") + require.NotNil(t, addRow) + assert.Equal(t, uint64(1), addRow.OpcodeCount) +} + +func TestFrameAggregator_NestedCalls(t *testing.T) { + aggregator := NewFrameAggregator() + + // Simulate transaction with nested CALL + // Root frame (depth 1) -> CALL -> Child frame (depth 2) + + // Frame 0 (root) - depth 1 + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 1, + Gas: 10000, + }, 0, 0, []uint32{0}, 3, 3, nil, nil) + + // CALL opcode: gasUsed includes child gas, gasSelf is just the CALL overhead + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "CALL", + Depth: 1, + Gas: 9997, + }, 1, 0, []uint32{0}, 5000, 100, nil, &execution.StructLog{Op: "PUSH1", Depth: 1}) + + // Frame 1 (child) - depth 2 + callAddr := "0x1234567890123456789012345678901234567890" + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 2, + Gas: 5000, + }, 2, 1, []uint32{0, 1}, 3, 3, &callAddr, &execution.StructLog{Op: "CALL", Depth: 1}) + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "RETURN", + Depth: 2, + Gas: 4997, + }, 3, 1, []uint32{0, 1}, 0, 0, nil, &execution.StructLog{Op: "PUSH1", Depth: 2}) + + // Back to root frame + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "STOP", + Depth: 1, + Gas: 4997, + }, 4, 0, []uint32{0}, 0, 0, nil, &execution.StructLog{Op: "RETURN", Depth: 2}) + + trace := &execution.TraceTransaction{ + Gas: 10000, + Failed: false, + } + + frames := aggregator.Finalize(trace, 500) + + // Should have 2 summary rows (root + child) + per-opcode rows + assert.Equal(t, 2, countSummaryRows(frames)) + + // Get summary rows for root and child frames + rootFrame := getSummaryRow(frames, 0) + childFrame := getSummaryRow(frames, 1) + + require.NotNil(t, rootFrame, "root frame should exist") + require.NotNil(t, childFrame, "child frame should exist") + + // Verify root frame + assert.Equal(t, uint32(0), rootFrame.CallFrameID) + assert.Nil(t, rootFrame.ParentCallFrameID) + assert.Equal(t, uint32(0), rootFrame.Depth) + assert.Equal(t, uint64(3), rootFrame.OpcodeCount) // PUSH1, CALL, STOP + assert.Equal(t, "", rootFrame.CallType) + + // Verify child frame + assert.Equal(t, uint32(1), childFrame.CallFrameID) + require.NotNil(t, childFrame.ParentCallFrameID) + assert.Equal(t, uint32(0), *childFrame.ParentCallFrameID) + assert.Equal(t, uint32(1), childFrame.Depth) // 0-based depth + assert.Equal(t, uint64(2), childFrame.OpcodeCount) // PUSH1, RETURN + assert.Equal(t, "CALL", childFrame.CallType) +} + +func TestFrameAggregator_ErrorCounting(t *testing.T) { + aggregator := NewFrameAggregator() + + errMsg := "execution reverted" + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 1, + Gas: 1000, + }, 0, 0, []uint32{0}, 3, 3, nil, nil) + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "REVERT", + Depth: 1, + Gas: 997, + Error: &errMsg, + }, 1, 0, []uint32{0}, 0, 0, nil, &execution.StructLog{Op: "PUSH1", Depth: 1}) + + trace := &execution.TraceTransaction{ + Gas: 1000, + Failed: true, + } + + frames := aggregator.Finalize(trace, 100) + + assert.Equal(t, 1, countSummaryRows(frames)) + + summaryRow := getSummaryRow(frames, 0) + require.NotNil(t, summaryRow) + assert.Equal(t, uint64(1), summaryRow.ErrorCount) + + // Check that REVERT opcode row also has error count + revertRow := getOpcodeRow(frames, 0, "REVERT") + require.NotNil(t, revertRow) + assert.Equal(t, uint64(1), revertRow.ErrorCount) +} + +func TestComputeIntrinsicGas_Uncapped(t *testing.T) { + // Test uncapped case: refund < receipt_gas/4 + gasCumulative := uint64(80000) + gasRefund := uint64(10000) + receiptGas := uint64(100000) + + intrinsic := computeIntrinsicGas(gasCumulative, gasRefund, receiptGas) + + // intrinsic = receipt_gas - gas_cumulative + gas_refund + // intrinsic = 100000 - 80000 + 10000 = 30000 + assert.Equal(t, uint64(30000), intrinsic) +} + +func TestComputeIntrinsicGas_Capped(t *testing.T) { + // Test capped case: refund >= receipt_gas/4 + gasCumulative := uint64(80000) + gasRefund := uint64(30000) // >= 100000/4 = 25000 + receiptGas := uint64(100000) + + intrinsic := computeIntrinsicGas(gasCumulative, gasRefund, receiptGas) + + // intrinsic = receipt_gas * 5/4 - gas_cumulative + // intrinsic = 125000 - 80000 = 45000 + assert.Equal(t, uint64(45000), intrinsic) +} + +func TestComputeIntrinsicGas_ZeroReceipt(t *testing.T) { + intrinsic := computeIntrinsicGas(1000, 100, 0) + assert.Equal(t, uint64(0), intrinsic) +} + +func TestComputeIntrinsicGas_NoUnderflow_WhenReceiptLessThanCumulative(t *testing.T) { + // This test verifies the fix for the underflow bug in the UNCAPPED path. + // When receiptGas < gasCumulative but receiptGas + gasRefund >= gasCumulative, + // the old code would underflow: receiptGas - gasCumulative + gasRefund + // The fix reorders to: (receiptGas + gasRefund) - gasCumulative + // + // To hit the uncapped path, we need: gasRefund < receiptGas/4 + // + // Example that triggers the bug in unfixed code: + // receiptGas = 100,000 + // gasCumulative = 110,000 + // gasRefund = 20,000 (< 25,000 = receiptGas/4, so UNCAPPED) + // Guard: 100,000 + 20,000 >= 110,000 ✓ (120,000 >= 110,000) + // Old calc: 100,000 - 110,000 = UNDERFLOW! + // Fixed calc: (100,000 + 20,000) - 110,000 = 10,000 + gasCumulative := uint64(110000) + gasRefund := uint64(20000) // < 100000/4 = 25000, so uncapped + receiptGas := uint64(100000) + + intrinsic := computeIntrinsicGas(gasCumulative, gasRefund, receiptGas) + + // Expected: (100000 + 20000) - 110000 = 10000 + assert.Equal(t, uint64(10000), intrinsic) + + // Verify it's NOT a huge underflow value + assert.Less(t, intrinsic, uint64(1000000), "intrinsic gas should be reasonable, not an underflow") +} + +func TestComputeIntrinsicGas_NoUnderflow_EdgeCase(t *testing.T) { + // Edge case: receiptGas + gasRefund == gasCumulative exactly (uncapped path) + // gasRefund must be < receiptGas/4 to hit uncapped path + gasCumulative := uint64(120000) + gasRefund := uint64(20000) // < 100000/4 = 25000, so uncapped + receiptGas := uint64(100000) + + intrinsic := computeIntrinsicGas(gasCumulative, gasRefund, receiptGas) + + // Expected: (100000 + 20000) - 120000 = 0 + assert.Equal(t, uint64(0), intrinsic) +} + +func TestComputeIntrinsicGas_NoUnderflow_ReceiptExceedsCumulative(t *testing.T) { + // Normal case: receiptGas >= gasCumulative (no underflow risk) + gasCumulative := uint64(80000) + gasRefund := uint64(10000) + receiptGas := uint64(100000) + + intrinsic := computeIntrinsicGas(gasCumulative, gasRefund, receiptGas) + + // Expected: (100000 + 10000) - 80000 = 30000 + // This matches the old formula: 100000 - 80000 + 10000 = 30000 + assert.Equal(t, uint64(30000), intrinsic) +} + +func TestComputeIntrinsicGas_GuardPreventsNegativeResult(t *testing.T) { + // When receiptGas + gasRefund < gasCumulative, guard prevents computation + gasCumulative := uint64(300000) + gasRefund := uint64(50000) + receiptGas := uint64(100000) + + intrinsic := computeIntrinsicGas(gasCumulative, gasRefund, receiptGas) + + // Guard: 100000 + 50000 >= 300000? No (150000 < 300000) + // So intrinsic stays 0 + assert.Equal(t, uint64(0), intrinsic) +} + +func TestIsParentOf(t *testing.T) { + tests := []struct { + name string + parentPath []uint32 + childPath []uint32 + expected bool + }{ + { + name: "direct parent", + parentPath: []uint32{0}, + childPath: []uint32{0, 1}, + expected: true, + }, + { + name: "nested parent", + parentPath: []uint32{0, 1}, + childPath: []uint32{0, 1, 2}, + expected: true, + }, + { + name: "not a parent - same length", + parentPath: []uint32{0, 1}, + childPath: []uint32{0, 2}, + expected: false, + }, + { + name: "not a parent - grandchild", + parentPath: []uint32{0}, + childPath: []uint32{0, 1, 2}, + expected: false, + }, + { + name: "not a parent - different path", + parentPath: []uint32{0, 1}, + childPath: []uint32{0, 2, 3}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isParentOf(tt.parentPath, tt.childPath) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestFrameAggregator_EOAFrame(t *testing.T) { + // Test that EOA frames (synthetic rows with operation="") have opcode_count=0 + aggregator := NewFrameAggregator() + + // Root frame with CALL to EOA + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 1, + Gas: 10000, + }, 0, 0, []uint32{0}, 3, 3, nil, nil) + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "CALL", + Depth: 1, + Gas: 9997, + }, 1, 0, []uint32{0}, 100, 100, nil, &execution.StructLog{Op: "PUSH1", Depth: 1}) + + // Synthetic EOA frame (operation = "", depth = 2) + eoaAddr := "0xEOAEOAEOAEOAEOAEOAEOAEOAEOAEOAEOAEOAEOAE" + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "", // Empty = synthetic EOA row + Depth: 2, + Gas: 0, + }, 1, 1, []uint32{0, 1}, 0, 0, &eoaAddr, &execution.StructLog{Op: "CALL", Depth: 1}) + + // Back to root frame + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "STOP", + Depth: 1, + Gas: 9897, + }, 2, 0, []uint32{0}, 0, 0, nil, &execution.StructLog{Op: "", Depth: 2}) + + trace := &execution.TraceTransaction{ + Gas: 10000, + Failed: false, + } + + frames := aggregator.Finalize(trace, 500) + + // Should have 2 summary rows (root + EOA) + assert.Equal(t, 2, countSummaryRows(frames)) + + // Get summary rows for root and EOA frames + rootFrame := getSummaryRow(frames, 0) + eoaFrame := getSummaryRow(frames, 1) + + require.NotNil(t, rootFrame, "root frame should exist") + require.NotNil(t, eoaFrame, "EOA frame should exist") + + // Root frame: 3 real opcodes (PUSH1, CALL, STOP) + assert.Equal(t, uint64(3), rootFrame.OpcodeCount) + + // EOA frame: 0 opcodes (only synthetic row with op="") + assert.Equal(t, uint64(0), eoaFrame.OpcodeCount) + assert.Equal(t, "CALL", eoaFrame.CallType) + require.NotNil(t, eoaFrame.TargetAddress) + assert.Equal(t, eoaAddr, *eoaFrame.TargetAddress) +} + +func TestFrameAggregator_SetRootTargetAddress(t *testing.T) { + aggregator := NewFrameAggregator() + + // Process a simple root frame + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "STOP", + Depth: 1, + Gas: 1000, + }, 0, 0, []uint32{0}, 0, 0, nil, nil) + + // Set root target address (simulating tx.To()) + rootAddr := "0x1234567890123456789012345678901234567890" + aggregator.SetRootTargetAddress(&rootAddr) + + trace := &execution.TraceTransaction{ + Gas: 1000, + Failed: false, + } + + frames := aggregator.Finalize(trace, 100) + + assert.Equal(t, 1, countSummaryRows(frames)) + + summaryRow := getSummaryRow(frames, 0) + require.NotNil(t, summaryRow) + require.NotNil(t, summaryRow.TargetAddress) + assert.Equal(t, rootAddr, *summaryRow.TargetAddress) +} + +func TestFrameAggregator_FailedTransaction_NoRefundButHasIntrinsic(t *testing.T) { + // Test that failed transactions do NOT have gas_refund but DO have intrinsic_gas. + // Intrinsic gas is ALWAYS charged (before EVM execution begins). + // For failed transactions, refunds are accumulated during execution but NOT applied + // to the final gas calculation. + aggregator := NewFrameAggregator() + + errMsg := "execution reverted" + refundValue := uint64(4800) // Refund accumulated during execution + + // Simulate a transaction that accumulates refunds but then fails + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 1, + Gas: 80000, + }, 0, 0, []uint32{0}, 3, 3, nil, nil) + + // SSTORE that generates a refund + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "SSTORE", + Depth: 1, + Gas: 79997, + Refund: &refundValue, // Refund accumulated + }, 1, 0, []uint32{0}, 20000, 20000, nil, &execution.StructLog{Op: "PUSH1", Depth: 1}) + + // Transaction fails with REVERT + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "REVERT", + Depth: 1, + Gas: 59997, + Error: &errMsg, + Refund: &refundValue, // Refund still present but won't be applied + }, 2, 0, []uint32{0}, 0, 0, nil, &execution.StructLog{Op: "SSTORE", Depth: 1}) + + trace := &execution.TraceTransaction{ + Gas: 80000, + Failed: true, + } + + // Receipt gas for failed tx + receiptGas := uint64(50000) + frames := aggregator.Finalize(trace, receiptGas) + + assert.Equal(t, 1, countSummaryRows(frames)) + + summaryRow := getSummaryRow(frames, 0) + require.NotNil(t, summaryRow) + + // Error count should be 1 + assert.Equal(t, uint64(1), summaryRow.ErrorCount) + + // GasRefund should be nil for failed transactions + // Even though refund was accumulated (4800), it's not applied when tx fails + assert.Nil(t, summaryRow.GasRefund, "GasRefund should be nil for failed transactions") + + // IntrinsicGas SHOULD be computed for failed transactions + // Intrinsic gas is always charged before EVM execution begins + // Formula: intrinsic = receiptGas - gasCumulative + 0 (no refund for failed) + require.NotNil(t, summaryRow.IntrinsicGas, "IntrinsicGas should be computed for failed transactions") +} + +func TestFrameAggregator_SuccessfulTransaction_HasRefundAndIntrinsic(t *testing.T) { + // Test that successful transactions DO have gas_refund and intrinsic_gas set. + aggregator := NewFrameAggregator() + + refundValue := uint64(4800) + + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 1, + Gas: 80000, + }, 0, 0, []uint32{0}, 3, 3, nil, nil) + + // SSTORE that generates a refund + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "SSTORE", + Depth: 1, + Gas: 79997, + Refund: &refundValue, + }, 1, 0, []uint32{0}, 20000, 20000, nil, &execution.StructLog{Op: "PUSH1", Depth: 1}) + + // Successful STOP + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "STOP", + Depth: 1, + Gas: 59997, + Refund: &refundValue, + }, 2, 0, []uint32{0}, 0, 0, nil, &execution.StructLog{Op: "SSTORE", Depth: 1}) + + trace := &execution.TraceTransaction{ + Gas: 80000, + Failed: false, + } + + // For successful tx, receipt gas = gas_used (after refund applied) + // Let's say receipt shows 15200 gas used + frames := aggregator.Finalize(trace, 15200) + + assert.Equal(t, 1, countSummaryRows(frames)) + + summaryRow := getSummaryRow(frames, 0) + require.NotNil(t, summaryRow) + + // Error count should be 0 + assert.Equal(t, uint64(0), summaryRow.ErrorCount) + + // GasRefund should be set for successful transactions + require.NotNil(t, summaryRow.GasRefund, "GasRefund should be set for successful transactions") + assert.Equal(t, refundValue, *summaryRow.GasRefund) + + // IntrinsicGas should be computed for successful transactions + // (exact value depends on the computation, just verify it's not nil) + // Note: might be nil if computed value is 0, so we just check the logic is exercised +} + +func TestFrameAggregator_RevertWithoutOpcodeError(t *testing.T) { + // Test that REVERT transactions are correctly detected as failed even when + // the REVERT opcode itself has no error field set. + // + // REVERT is a successful opcode execution that causes transaction failure. + // Unlike "out of gas" errors where the opcode has an error field, REVERT + // executes successfully but reverts state changes. The failure is indicated + // by trace.Failed = true, NOT by individual opcode errors. + aggregator := NewFrameAggregator() + + // Simulate a transaction that reverts: PUSH1 -> REVERT + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "PUSH1", + Depth: 1, + Gas: 50000, + }, 0, 0, []uint32{0}, 3, 3, nil, nil) + + // REVERT opcode with NO error field (realistic behavior) + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "REVERT", + Depth: 1, + Gas: 49997, + // Note: NO Error field set - REVERT executes successfully + }, 1, 0, []uint32{0}, 0, 0, nil, &execution.StructLog{Op: "PUSH1", Depth: 1}) + + // trace.Failed is true because the transaction reverted + trace := &execution.TraceTransaction{ + Gas: 50000, + Failed: true, // This is how REVERT is indicated + } + + frames := aggregator.Finalize(trace, 30000) + + assert.Equal(t, 1, countSummaryRows(frames)) + + summaryRow := getSummaryRow(frames, 0) + require.NotNil(t, summaryRow) + + // Error count MUST be 1 even though no opcode had an error field + // This is the key assertion: trace.Failed should set error_count = 1 + assert.Equal(t, uint64(1), summaryRow.ErrorCount, + "ErrorCount should be 1 for REVERT transactions even without opcode errors") + + // GasRefund should be nil for failed transactions + assert.Nil(t, summaryRow.GasRefund, + "GasRefund should be nil for reverted transactions") +} + +func TestMapOpcodeToCallType(t *testing.T) { + tests := []struct { + opcode string + expected string + }{ + {"CALL", "CALL"}, + {"CALLCODE", "CALLCODE"}, + {"DELEGATECALL", "DELEGATECALL"}, + {"STATICCALL", "STATICCALL"}, + {"CREATE", "CREATE"}, + {"CREATE2", "CREATE2"}, + {"PUSH1", "UNKNOWN"}, + {"STOP", "UNKNOWN"}, + } + + for _, tt := range tests { + t.Run(tt.opcode, func(t *testing.T) { + result := mapOpcodeToCallType(tt.opcode) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/pkg/processor/transaction/structlog_agg/block_processing.go b/pkg/processor/transaction/structlog_agg/block_processing.go new file mode 100644 index 0000000..05c7773 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/block_processing.go @@ -0,0 +1,317 @@ +package structlog_agg + +import ( + "context" + "errors" + "fmt" + "math/big" + + "github.com/hibiken/asynq" + "github.com/sirupsen/logrus" + + "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" + "github.com/ethpandaops/execution-processor/pkg/processor/tracker" + "github.com/ethpandaops/execution-processor/pkg/state" +) + +// ProcessNextBlock processes the next available block(s). +// In zero-interval mode, this attempts to fetch and process multiple blocks +// up to the available capacity for improved throughput. +func (p *Processor) ProcessNextBlock(ctx context.Context) error { + p.log.WithField("network", p.network.Name).Debug("Querying for next block to process") + + // Get current chain head for state manager (used when no history exists) + var chainHead *big.Int + + node := p.pool.GetHealthyExecutionNode() + + if node != nil { + if latestBlockNum, err := node.BlockNumber(ctx); err == nil && latestBlockNum != nil { + chainHead = new(big.Int).SetUint64(*latestBlockNum) + p.log.WithFields(logrus.Fields{ + "chain_head": chainHead.String(), + }).Debug("Retrieved chain head for state manager") + } + } + + // Get next block to determine starting point + nextBlock, err := p.stateManager.NextBlock(ctx, p.Name(), p.network.Name, p.processingMode, chainHead) + if err != nil { + if errors.Is(err, state.ErrNoMoreBlocks) { + p.log.Debug("no more blocks to process") + + return nil + } + + p.log.WithError(err).WithField("network", p.network.Name).Error("could not get next block") + + return err + } + + if nextBlock == nil { + p.log.Debug("no more blocks to process") + + return nil + } + + // Get available capacity for batch processing + capacity, err := p.GetAvailableCapacity(ctx, nextBlock.Uint64(), p.processingMode) + if err != nil { + p.log.WithError(err).Warn("Failed to get available capacity, falling back to single block") + + capacity = 1 + } + + if capacity <= 0 { + p.log.Debug("No capacity available, waiting for tasks to complete") + + return nil + } + + // Get batch of block numbers + blockNumbers, err := p.stateManager.NextBlocks(ctx, p.Name(), p.network.Name, p.processingMode, chainHead, capacity) + if err != nil { + p.log.WithError(err).Warn("Failed to get batch of block numbers, falling back to single block") + + blockNumbers = []*big.Int{nextBlock} + } + + if len(blockNumbers) == 0 { + p.log.Debug("No blocks to process") + + return nil + } + + // Validate batch won't exceed leash + if validateErr := p.ValidateBatchWithinLeash(ctx, blockNumbers[0].Uint64(), len(blockNumbers), p.processingMode); validateErr != nil { + p.log.WithError(validateErr).Warn("Batch validation failed, reducing to single block") + + blockNumbers = blockNumbers[:1] + } + + // Fetch blocks using batch RPC + blocks, err := node.BlocksByNumbers(ctx, blockNumbers) + if err != nil { + p.log.WithError(err).WithField("network", p.network.Name).Error("could not fetch blocks") + + return err + } + + if len(blocks) == 0 { + // No blocks returned - might be at chain tip + return p.handleBlockNotFound(ctx, node, nextBlock) + } + + p.log.WithFields(logrus.Fields{ + "requested": len(blockNumbers), + "received": len(blocks), + "network": p.network.Name, + }).Debug("Fetched batch of blocks") + + // Process each block, stopping on first error + for _, block := range blocks { + if processErr := p.processBlock(ctx, block); processErr != nil { + return processErr + } + } + + return nil +} + +// handleBlockNotFound handles the case when a block is not found. +func (p *Processor) handleBlockNotFound(ctx context.Context, node execution.Node, nextBlock *big.Int) error { + // Check if we're close to chain tip to determine if this is expected + if latestBlock, latestErr := node.BlockNumber(ctx); latestErr == nil && latestBlock != nil { + chainTip := new(big.Int).SetUint64(*latestBlock) + diff := new(big.Int).Sub(nextBlock, chainTip).Int64() + + if diff <= 5 { // Within 5 blocks of chain tip + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": nextBlock, + "chain_tip": chainTip, + "blocks_ahead": diff, + }).Info("Waiting for block to be available on execution node") + + return fmt.Errorf("block %s not yet available (chain tip: %s)", nextBlock, chainTip) + } + } + + return fmt.Errorf("block %s not found", nextBlock) +} + +// processBlock processes a single block - the core logic extracted from the original ProcessNextBlock. +func (p *Processor) processBlock(ctx context.Context, block execution.Block) error { + blockNumber := block.Number() + + // Check if this block was recently processed to avoid rapid reprocessing + if recentlyProcessed, checkErr := p.stateManager.IsBlockRecentlyProcessed(ctx, blockNumber.Uint64(), p.network.Name, p.Name(), 30); checkErr == nil && recentlyProcessed { + p.log.WithFields(logrus.Fields{ + "block_number": blockNumber.String(), + "network": p.network.Name, + }).Debug("Block was recently processed, skipping to prevent rapid reprocessing") + + common.BlockProcessingSkipped.WithLabelValues(p.network.Name, p.Name(), "recently_processed").Inc() + + return nil + } + + // Update lightweight block height metric + common.BlockHeight.WithLabelValues(p.network.Name, ProcessorName).Set(float64(blockNumber.Int64())) + + p.log.WithFields(logrus.Fields{ + "block_number": blockNumber.String(), + "network": p.network.Name, + }).Debug("Processing block") + + // Handle empty blocks - mark complete immediately (no task tracking needed) + if len(block.Transactions()) == 0 { + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + }).Debug("skipping empty block") + + // Mark the block as complete immediately (no tasks to track) + if markErr := p.stateManager.MarkBlockComplete(ctx, blockNumber.Uint64(), p.network.Name, p.Name()); markErr != nil { + p.log.WithError(markErr).WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + }).Error("could not mark empty block as complete") + + return markErr + } + + return nil + } + + // Calculate expected task count before enqueueing + expectedTaskCount := len(block.Transactions()) + + // Acquire exclusive lock on this block via Redis FIRST + if initErr := p.pendingTracker.InitBlock(ctx, blockNumber.Uint64(), expectedTaskCount, p.network.Name, p.Name(), p.processingMode); initErr != nil { + // If block is already being processed by another worker, skip gracefully + if errors.Is(initErr, tracker.ErrBlockAlreadyBeingProcessed) { + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + }).Debug("Block already being processed by another worker, skipping") + + common.BlockProcessingSkipped.WithLabelValues(p.network.Name, p.Name(), "already_processing").Inc() + + return nil + } + + p.log.WithError(initErr).WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + }).Error("could not init block tracking in Redis") + + return initErr + } + + // Mark the block as enqueued AFTER acquiring Redis lock (phase 1 of two-phase completion) + if markErr := p.stateManager.MarkBlockEnqueued(ctx, blockNumber.Uint64(), expectedTaskCount, p.network.Name, p.Name()); markErr != nil { + p.log.WithError(markErr).WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + }).Error("could not mark block as enqueued") + + // Clean up Redis lock since we failed to mark in ClickHouse + _ = p.pendingTracker.CleanupBlock(ctx, blockNumber.Uint64(), p.network.Name, p.Name(), p.processingMode) + + return markErr + } + + // Enqueue tasks for each transaction LAST + taskCount, err := p.EnqueueTransactionTasks(ctx, block) + if err != nil { + return fmt.Errorf("failed to enqueue transaction tasks: %w", err) + } + + // Log warning if actual count differs from expected + if taskCount != expectedTaskCount { + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + "expected_task_count": expectedTaskCount, + "actual_task_count": taskCount, + }).Warn("task count mismatch - some tasks may have failed to enqueue") + } + + p.log.WithFields(logrus.Fields{ + "network": p.network.Name, + "block_number": blockNumber, + "tx_count": len(block.Transactions()), + "task_count": taskCount, + }).Info("enqueued block for processing") + + return nil +} + +// EnqueueTransactionTasks enqueues transaction processing tasks for a given block. +func (p *Processor) EnqueueTransactionTasks(ctx context.Context, block execution.Block) (int, error) { + var enqueuedCount int + + var errs []error + + for index, tx := range block.Transactions() { + // Create process task payload + payload := &ProcessPayload{ + BlockNumber: *block.Number(), + TransactionHash: tx.Hash().String(), + TransactionIndex: uint32(index), //nolint:gosec // index is bounded by block.Transactions() length + NetworkName: p.network.Name, + Network: p.network.Name, + } + + // Create the task based on processing mode + var task *asynq.Task + + var queue string + + var taskType string + + var err error + + if p.processingMode == tracker.BACKWARDS_MODE { + task, err = NewProcessBackwardsTask(payload) + queue = p.getProcessBackwardsQueue() + taskType = ProcessBackwardsTaskType + } else { + task, err = NewProcessForwardsTask(payload) + queue = p.getProcessForwardsQueue() + taskType = ProcessForwardsTaskType + } + + if err != nil { + errs = append(errs, fmt.Errorf("failed to create task for tx %s: %w", tx.Hash().String(), err)) + + continue + } + + // Enqueue the task + if err := p.EnqueueTask(ctx, task, asynq.Queue(queue)); err != nil { + errs = append(errs, fmt.Errorf("failed to enqueue task for tx %s: %w", tx.Hash().String(), err)) + + continue + } + + enqueuedCount++ + + common.TasksEnqueued.WithLabelValues(p.network.Name, ProcessorName, queue, taskType).Inc() + } + + p.log.WithFields(logrus.Fields{ + "block_number": block.Number(), + "total_txs": len(block.Transactions()), + "enqueued_count": enqueuedCount, + "error_count": len(errs), + }).Info("Enqueued transaction processing tasks") + + if len(errs) > 0 { + return enqueuedCount, fmt.Errorf("failed to enqueue %d tasks: %v", len(errs), errs[0]) + } + + return enqueuedCount, nil +} diff --git a/pkg/processor/transaction/structlog_agg/columns.go b/pkg/processor/transaction/structlog_agg/columns.go new file mode 100644 index 0000000..d301175 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/columns.go @@ -0,0 +1,182 @@ +package structlog_agg + +import ( + "time" + + "github.com/ClickHouse/ch-go/proto" +) + +// ClickHouseTime wraps time.Time for ClickHouse DateTime formatting. +type ClickHouseTime time.Time + +// NewClickHouseTime creates a new ClickHouseTime from time.Time. +func NewClickHouseTime(t time.Time) ClickHouseTime { + return ClickHouseTime(t) +} + +// Time returns the underlying time.Time. +func (t ClickHouseTime) Time() time.Time { + return time.Time(t) +} + +// Columns holds all columns for structlog_agg batch insert using ch-go columnar protocol. +type Columns struct { + UpdatedDateTime proto.ColDateTime + BlockNumber proto.ColUInt64 + TransactionHash proto.ColStr + TransactionIndex proto.ColUInt32 + CallFrameID proto.ColUInt32 + ParentCallFrameID *proto.ColNullable[uint32] + CallFramePath *proto.ColArr[uint32] // Path from root to this frame + Depth proto.ColUInt32 + TargetAddress *proto.ColNullable[string] + CallType proto.ColStr + Operation proto.ColStr // Empty string for summary row, opcode name for per-opcode rows + OpcodeCount proto.ColUInt64 + ErrorCount proto.ColUInt64 + Gas proto.ColUInt64 // SUM(gas_self) - excludes child frame gas + GasCumulative proto.ColUInt64 // For summary: frame gas_cumulative; for per-opcode: SUM(gas_used) + MinDepth proto.ColUInt32 // Per-opcode: MIN(depth); summary: same as Depth + MaxDepth proto.ColUInt32 // Per-opcode: MAX(depth); summary: same as Depth + GasRefund *proto.ColNullable[uint64] + IntrinsicGas *proto.ColNullable[uint64] + MetaNetworkName proto.ColStr +} + +// NewColumns creates a new Columns instance with all columns initialized. +func NewColumns() *Columns { + return &Columns{ + ParentCallFrameID: new(proto.ColUInt32).Nullable(), + CallFramePath: new(proto.ColUInt32).Array(), + TargetAddress: new(proto.ColStr).Nullable(), + GasRefund: new(proto.ColUInt64).Nullable(), + IntrinsicGas: new(proto.ColUInt64).Nullable(), + } +} + +// Append adds a row to all columns. +func (c *Columns) Append( + updatedDateTime time.Time, + blockNumber uint64, + txHash string, + txIndex uint32, + callFrameID uint32, + parentCallFrameID *uint32, + callFramePath []uint32, + depth uint32, + targetAddress *string, + callType string, + operation string, + opcodeCount uint64, + errorCount uint64, + gas uint64, + gasCumulative uint64, + minDepth uint32, + maxDepth uint32, + gasRefund *uint64, + intrinsicGas *uint64, + network string, +) { + c.UpdatedDateTime.Append(updatedDateTime) + c.BlockNumber.Append(blockNumber) + c.TransactionHash.Append(txHash) + c.TransactionIndex.Append(txIndex) + c.CallFrameID.Append(callFrameID) + c.ParentCallFrameID.Append(nullableUint32(parentCallFrameID)) + c.CallFramePath.Append(callFramePath) + c.Depth.Append(depth) + c.TargetAddress.Append(nullableStr(targetAddress)) + c.CallType.Append(callType) + c.Operation.Append(operation) + c.OpcodeCount.Append(opcodeCount) + c.ErrorCount.Append(errorCount) + c.Gas.Append(gas) + c.GasCumulative.Append(gasCumulative) + c.MinDepth.Append(minDepth) + c.MaxDepth.Append(maxDepth) + c.GasRefund.Append(nullableUint64(gasRefund)) + c.IntrinsicGas.Append(nullableUint64(intrinsicGas)) + c.MetaNetworkName.Append(network) +} + +// Reset clears all columns for reuse. +func (c *Columns) Reset() { + c.UpdatedDateTime.Reset() + c.BlockNumber.Reset() + c.TransactionHash.Reset() + c.TransactionIndex.Reset() + c.CallFrameID.Reset() + c.ParentCallFrameID.Reset() + c.CallFramePath.Reset() + c.Depth.Reset() + c.TargetAddress.Reset() + c.CallType.Reset() + c.Operation.Reset() + c.OpcodeCount.Reset() + c.ErrorCount.Reset() + c.Gas.Reset() + c.GasCumulative.Reset() + c.MinDepth.Reset() + c.MaxDepth.Reset() + c.GasRefund.Reset() + c.IntrinsicGas.Reset() + c.MetaNetworkName.Reset() +} + +// Input returns the proto.Input for inserting data. +func (c *Columns) Input() proto.Input { + return proto.Input{ + {Name: "updated_date_time", Data: &c.UpdatedDateTime}, + {Name: "block_number", Data: &c.BlockNumber}, + {Name: "transaction_hash", Data: &c.TransactionHash}, + {Name: "transaction_index", Data: &c.TransactionIndex}, + {Name: "call_frame_id", Data: &c.CallFrameID}, + {Name: "parent_call_frame_id", Data: c.ParentCallFrameID}, + {Name: "call_frame_path", Data: c.CallFramePath}, + {Name: "depth", Data: &c.Depth}, + {Name: "target_address", Data: c.TargetAddress}, + {Name: "call_type", Data: &c.CallType}, + {Name: "operation", Data: &c.Operation}, + {Name: "opcode_count", Data: &c.OpcodeCount}, + {Name: "error_count", Data: &c.ErrorCount}, + {Name: "gas", Data: &c.Gas}, + {Name: "gas_cumulative", Data: &c.GasCumulative}, + {Name: "min_depth", Data: &c.MinDepth}, + {Name: "max_depth", Data: &c.MaxDepth}, + {Name: "gas_refund", Data: c.GasRefund}, + {Name: "intrinsic_gas", Data: c.IntrinsicGas}, + {Name: "meta_network_name", Data: &c.MetaNetworkName}, + } +} + +// Rows returns the number of rows in the columns. +func (c *Columns) Rows() int { + return c.BlockNumber.Rows() +} + +// nullableStr converts a *string to proto.Nullable[string]. +func nullableStr(s *string) proto.Nullable[string] { + if s == nil { + return proto.Null[string]() + } + + return proto.NewNullable(*s) +} + +// nullableUint32 converts a *uint32 to proto.Nullable[uint32]. +func nullableUint32(v *uint32) proto.Nullable[uint32] { + if v == nil { + return proto.Null[uint32]() + } + + return proto.NewNullable(*v) +} + +// nullableUint64 converts a *uint64 to proto.Nullable[uint64]. +func nullableUint64(v *uint64) proto.Nullable[uint64] { + if v == nil { + return proto.Null[uint64]() + } + + return proto.NewNullable(*v) +} diff --git a/pkg/processor/transaction/structlog_agg/config.go b/pkg/processor/transaction/structlog_agg/config.go new file mode 100644 index 0000000..3e0af8b --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/config.go @@ -0,0 +1,46 @@ +package structlog_agg + +import ( + "fmt" + "time" + + "github.com/ethpandaops/execution-processor/pkg/clickhouse" +) + +// Default buffer configuration values. +const ( + DefaultBufferMaxRows = 100000 + DefaultBufferFlushInterval = time.Second +) + +// Config holds configuration for transaction structlog_agg processor. +type Config struct { + clickhouse.Config `yaml:",inline"` + Enabled bool `yaml:"enabled"` + Table string `yaml:"table"` + + // Row buffer settings for batched ClickHouse inserts + BufferMaxRows int `yaml:"bufferMaxRows"` // Max rows before flush. Default: 100000 + BufferFlushInterval time.Duration `yaml:"bufferFlushInterval"` // Max time before flush. Default: 1s + + // Block completion tracking + MaxPendingBlockRange int `yaml:"maxPendingBlockRange"` // Max distance between oldest incomplete and current block. Default: 2 +} + +// Validate validates the configuration. +func (c *Config) Validate() error { + if !c.Enabled { + return nil + } + + // Validate the embedded clickhouse config + if err := c.Config.Validate(); err != nil { + return fmt.Errorf("clickhouse config validation failed: %w", err) + } + + if c.Table == "" { + return fmt.Errorf("transaction structlog_agg table is required when enabled") + } + + return nil +} diff --git a/pkg/processor/transaction/structlog_agg/handlers.go b/pkg/processor/transaction/structlog_agg/handlers.go new file mode 100644 index 0000000..e6bb887 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/handlers.go @@ -0,0 +1,137 @@ +package structlog_agg + +import ( + "context" + "fmt" + "time" + + "github.com/hibiken/asynq" + "github.com/sirupsen/logrus" + + "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/processor/tracker" +) + +// handleProcessForwardsTask handles the forwards processing of a single transaction. +func (p *Processor) handleProcessForwardsTask(ctx context.Context, task *asynq.Task) error { + start := time.Now() + + defer func() { + duration := time.Since(start) + common.TaskProcessingDuration.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessForwardsQueue(ProcessorName), ProcessForwardsTaskType).Observe(duration.Seconds()) + }() + + var payload ProcessPayload + if err := payload.UnmarshalBinary(task.Payload()); err != nil { + common.TasksErrored.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessForwardsQueue(ProcessorName), ProcessForwardsTaskType, "unmarshal_error").Inc() + + return fmt.Errorf("failed to unmarshal process payload: %w", err) + } + + // Get healthy execution node + node := p.pool.GetHealthyExecutionNode() + if node == nil { + return fmt.Errorf("no healthy execution node available") + } + + // Get block data + blockNumber := &payload.BlockNumber + + block, err := node.BlockByNumber(ctx, blockNumber) + if err != nil { + return fmt.Errorf("failed to get block: %w", err) + } + + // Find the transaction in the block + if int(payload.TransactionIndex) >= len(block.Transactions()) { + return fmt.Errorf("transaction index %d out of range for block %s", payload.TransactionIndex, payload.BlockNumber.String()) + } + + tx := block.Transactions()[payload.TransactionIndex] + if tx.Hash().String() != payload.TransactionHash { + return fmt.Errorf("transaction hash mismatch: expected %s, got %s", payload.TransactionHash, tx.Hash().String()) + } + + // Process transaction - aggregate call frames + frameCount, err := p.ProcessTransaction(ctx, block, int(payload.TransactionIndex), tx) + if err != nil { + common.TasksErrored.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessForwardsQueue(ProcessorName), ProcessForwardsTaskType, "processing_error").Inc() + + return fmt.Errorf("failed to process transaction: %w", err) + } + + // Record successful processing + common.TasksProcessed.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessForwardsQueue(ProcessorName), ProcessForwardsTaskType, "success").Inc() + + // Track block completion using embedded Limiter + p.TrackBlockCompletion(ctx, blockNumber.Uint64(), tracker.FORWARDS_MODE) + + p.log.WithFields(logrus.Fields{ + "transaction_hash": payload.TransactionHash, + "frame_count": frameCount, + }).Debug("Processed transaction") + + return nil +} + +// handleProcessBackwardsTask handles the backwards processing of a single transaction. +func (p *Processor) handleProcessBackwardsTask(ctx context.Context, task *asynq.Task) error { + start := time.Now() + + defer func() { + duration := time.Since(start) + common.TaskProcessingDuration.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessBackwardsQueue(ProcessorName), ProcessBackwardsTaskType).Observe(duration.Seconds()) + }() + + var payload ProcessPayload + if err := payload.UnmarshalBinary(task.Payload()); err != nil { + common.TasksErrored.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessBackwardsQueue(ProcessorName), ProcessBackwardsTaskType, "unmarshal_error").Inc() + + return fmt.Errorf("failed to unmarshal process payload: %w", err) + } + + // Get healthy execution node + node := p.pool.GetHealthyExecutionNode() + if node == nil { + return fmt.Errorf("no healthy execution node available") + } + + // Get block data + blockNumber := &payload.BlockNumber + + block, err := node.BlockByNumber(ctx, blockNumber) + if err != nil { + return fmt.Errorf("failed to get block: %w", err) + } + + // Find the transaction in the block + if int(payload.TransactionIndex) >= len(block.Transactions()) { + return fmt.Errorf("transaction index %d out of range for block %s", payload.TransactionIndex, payload.BlockNumber.String()) + } + + tx := block.Transactions()[payload.TransactionIndex] + if tx.Hash().String() != payload.TransactionHash { + return fmt.Errorf("transaction hash mismatch: expected %s, got %s", payload.TransactionHash, tx.Hash().String()) + } + + // Process transaction - aggregate call frames + frameCount, err := p.ProcessTransaction(ctx, block, int(payload.TransactionIndex), tx) + if err != nil { + common.TasksErrored.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessBackwardsQueue(ProcessorName), ProcessBackwardsTaskType, "processing_error").Inc() + + return fmt.Errorf("failed to process transaction: %w", err) + } + + // Record successful processing + common.TasksProcessed.WithLabelValues(p.network.Name, ProcessorName, tracker.ProcessBackwardsQueue(ProcessorName), ProcessBackwardsTaskType, "success").Inc() + + // Track block completion using embedded Limiter + p.TrackBlockCompletion(ctx, blockNumber.Uint64(), tracker.BACKWARDS_MODE) + + p.log.WithFields(logrus.Fields{ + "transaction_hash": payload.TransactionHash, + "frame_count": frameCount, + }).Debug("Processed transaction") + + return nil +} diff --git a/pkg/processor/transaction/structlog_agg/processor.go b/pkg/processor/transaction/structlog_agg/processor.go new file mode 100644 index 0000000..2e3a732 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/processor.go @@ -0,0 +1,406 @@ +package structlog_agg + +import ( + "context" + "fmt" + "math" + "math/big" + "sync" + "time" + + "github.com/ClickHouse/ch-go" + "github.com/hibiken/asynq" + "github.com/redis/go-redis/v9" + "github.com/sirupsen/logrus" + + "github.com/ethpandaops/execution-processor/pkg/clickhouse" + "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum" + "github.com/ethpandaops/execution-processor/pkg/processor/tracker" + "github.com/ethpandaops/execution-processor/pkg/rowbuffer" + "github.com/ethpandaops/execution-processor/pkg/state" +) + +const ( + metricsUpdateInterval = 15 * time.Second +) + +// Compile-time interface compliance check. +var _ tracker.BlockProcessor = (*Processor)(nil) + +// Dependencies contains the dependencies needed for the processor. +type Dependencies struct { + Log logrus.FieldLogger + Pool *ethereum.Pool + Network *ethereum.Network + State *state.Manager + AsynqClient *asynq.Client + RedisClient *redis.Client + RedisPrefix string +} + +// insertRow wraps CallFrameRow with additional context needed for batched inserts. +type insertRow struct { + Frame CallFrameRow + BlockNumber uint64 + TxHash string + TxIndex uint32 + UpdatedAt time.Time + Network string +} + +// Processor handles transaction structlog_agg processing. +type Processor struct { + log logrus.FieldLogger + pool *ethereum.Pool + stateManager *state.Manager + clickhouse clickhouse.ClientInterface + config *Config + network *ethereum.Network + asynqClient *asynq.Client + processingMode string + redisPrefix string + pendingTracker *tracker.PendingTracker + + // Row buffer for batched ClickHouse inserts + rowBuffer *rowbuffer.Buffer[insertRow] + + // Embedded limiter for shared blocking/completion logic + *tracker.Limiter + + // Background metrics worker fields + metricsStop chan struct{} + metricsWg sync.WaitGroup + metricsStarted bool + metricsStartedMu sync.Mutex +} + +// New creates a new transaction structlog_agg processor. +func New(deps *Dependencies, config *Config) (*Processor, error) { + // Create a copy of the embedded config and set processor-specific values + clickhouseConfig := config.Config + clickhouseConfig.Network = deps.Network.Name + clickhouseConfig.Processor = ProcessorName + + clickhouseClient, err := clickhouse.New(&clickhouseConfig) + if err != nil { + return nil, fmt.Errorf("failed to create clickhouse client for transaction_structlog_agg: %w", err) + } + + // Set default for MaxPendingBlockRange + if config.MaxPendingBlockRange <= 0 { + config.MaxPendingBlockRange = tracker.DefaultMaxPendingBlockRange + } + + // Set buffer defaults + if config.BufferMaxRows <= 0 { + config.BufferMaxRows = DefaultBufferMaxRows + } + + if config.BufferFlushInterval <= 0 { + config.BufferFlushInterval = DefaultBufferFlushInterval + } + + log := deps.Log.WithField("processor", ProcessorName) + pendingTracker := tracker.NewPendingTracker(deps.RedisClient, deps.RedisPrefix, log) + + // Create the limiter for shared functionality + limiter := tracker.NewLimiter( + &tracker.LimiterDeps{ + Log: log, + StateProvider: deps.State, + PendingTracker: pendingTracker, + Network: deps.Network.Name, + Processor: ProcessorName, + }, + tracker.LimiterConfig{ + MaxPendingBlockRange: config.MaxPendingBlockRange, + }, + ) + + processor := &Processor{ + log: log, + pool: deps.Pool, + stateManager: deps.State, + clickhouse: clickhouseClient, + config: config, + asynqClient: deps.AsynqClient, + processingMode: tracker.FORWARDS_MODE, // Default mode + redisPrefix: deps.RedisPrefix, + pendingTracker: pendingTracker, + Limiter: limiter, + } + + processor.network = deps.Network + + // Create the row buffer with the flush function + processor.rowBuffer = rowbuffer.New( + rowbuffer.Config{ + MaxRows: config.BufferMaxRows, + FlushInterval: config.BufferFlushInterval, + Network: deps.Network.Name, + Processor: ProcessorName, + Table: config.Table, + }, + processor.flushRows, + log, + ) + + processor.log.WithFields(logrus.Fields{ + "network": processor.network.Name, + "max_pending_block_range": config.MaxPendingBlockRange, + "buffer_max_rows": config.BufferMaxRows, + "buffer_flush_interval": config.BufferFlushInterval, + }).Info("Detected network") + + return processor, nil +} + +// Start starts the processor. +func (p *Processor) Start(ctx context.Context) error { + // Start the ClickHouse client + if err := p.clickhouse.Start(); err != nil { + return fmt.Errorf("failed to start ClickHouse client: %w", err) + } + + // Start the row buffer + if err := p.rowBuffer.Start(ctx); err != nil { + return fmt.Errorf("failed to start row buffer: %w", err) + } + + // Start the background metrics worker + p.startMetricsWorker() + + p.log.Info("Transaction structlog_agg processor ready") + + return nil +} + +// Stop stops the processor. +func (p *Processor) Stop(ctx context.Context) error { + p.log.Info("Stopping transaction structlog_agg processor") + + // Stop the background metrics worker + p.stopMetricsWorker() + + // Stop the row buffer first (flushes remaining rows) + if err := p.rowBuffer.Stop(ctx); err != nil { + p.log.WithError(err).Error("Failed to stop row buffer") + } + + // Stop the ClickHouse client + return p.clickhouse.Stop() +} + +// Name returns the processor name. +func (p *Processor) Name() string { + return ProcessorName +} + +// GetQueues returns the queues used by this processor. +func (p *Processor) GetQueues() []tracker.QueueInfo { + return []tracker.QueueInfo{ + { + Name: tracker.PrefixedProcessForwardsQueue(ProcessorName, p.redisPrefix), + Priority: 10, // Highest priority for forwards processing + }, + { + Name: tracker.PrefixedProcessBackwardsQueue(ProcessorName, p.redisPrefix), + Priority: 5, // Medium priority for backwards processing + }, + } +} + +// GetHandlers returns the task handlers for this processor. +func (p *Processor) GetHandlers() map[string]asynq.HandlerFunc { + return map[string]asynq.HandlerFunc{ + ProcessForwardsTaskType: p.handleProcessForwardsTask, + ProcessBackwardsTaskType: p.handleProcessBackwardsTask, + } +} + +// EnqueueTask enqueues a task to the specified queue with infinite retries. +func (p *Processor) EnqueueTask(ctx context.Context, task *asynq.Task, opts ...asynq.Option) error { + opts = append(opts, asynq.MaxRetry(math.MaxInt32)) + + _, err := p.asynqClient.EnqueueContext(ctx, task, opts...) + + return err +} + +// SetProcessingMode sets the processing mode for the processor. +func (p *Processor) SetProcessingMode(mode string) { + p.processingMode = mode + p.log.WithField("mode", mode).Info("Processing mode updated") +} + +// getProcessForwardsQueue returns the prefixed process forwards queue name. +func (p *Processor) getProcessForwardsQueue() string { + return tracker.PrefixedProcessForwardsQueue(ProcessorName, p.redisPrefix) +} + +// getProcessBackwardsQueue returns the prefixed process backwards queue name. +func (p *Processor) getProcessBackwardsQueue() string { + return tracker.PrefixedProcessBackwardsQueue(ProcessorName, p.redisPrefix) +} + +// flushRows is the flush function for the row buffer. +func (p *Processor) flushRows(ctx context.Context, rows []insertRow) error { + if len(rows) == 0 { + return nil + } + + // Add timeout for ClickHouse operation + insertCtx, cancel := context.WithTimeout(ctx, tracker.DefaultClickHouseTimeout) + defer cancel() + + cols := NewColumns() + + for _, row := range rows { + cols.Append( + row.UpdatedAt, + row.BlockNumber, + row.TxHash, + row.TxIndex, + row.Frame.CallFrameID, + row.Frame.ParentCallFrameID, + row.Frame.CallFramePath, + row.Frame.Depth, + row.Frame.TargetAddress, + row.Frame.CallType, + row.Frame.Operation, + row.Frame.OpcodeCount, + row.Frame.ErrorCount, + row.Frame.Gas, + row.Frame.GasCumulative, + row.Frame.MinDepth, + row.Frame.MaxDepth, + row.Frame.GasRefund, + row.Frame.IntrinsicGas, + row.Network, + ) + } + + input := cols.Input() + + if err := p.clickhouse.Do(insertCtx, ch.Query{ + Body: input.Into(p.config.Table), + Input: input, + }); err != nil { + common.ClickHouseInsertsRows.WithLabelValues( + p.network.Name, ProcessorName, p.config.Table, "failed", "", + ).Add(float64(len(rows))) + + return fmt.Errorf("failed to insert call frames: %w", err) + } + + common.ClickHouseInsertsRows.WithLabelValues( + p.network.Name, ProcessorName, p.config.Table, "success", "", + ).Add(float64(len(rows))) + + return nil +} + +// insertCallFrames submits call frames to the row buffer for batched insertion. +func (p *Processor) insertCallFrames(ctx context.Context, frames []CallFrameRow, blockNumber uint64, txHash string, txIndex uint32, now time.Time) error { + if len(frames) == 0 { + return nil + } + + // Convert to insertRow slice + rows := make([]insertRow, len(frames)) + for i, frame := range frames { + rows[i] = insertRow{ + Frame: frame, + BlockNumber: blockNumber, + TxHash: txHash, + TxIndex: txIndex, + UpdatedAt: now, + Network: p.network.Name, + } + } + + return p.rowBuffer.Submit(ctx, rows) +} + +// startMetricsWorker starts the background metrics update worker. +func (p *Processor) startMetricsWorker() { + p.metricsStartedMu.Lock() + defer p.metricsStartedMu.Unlock() + + if p.metricsStarted { + return + } + + p.metricsStarted = true + p.metricsStop = make(chan struct{}) + p.metricsWg.Add(1) + + go p.runMetricsWorker() +} + +// stopMetricsWorker stops the background metrics update worker. +func (p *Processor) stopMetricsWorker() { + p.metricsStartedMu.Lock() + defer p.metricsStartedMu.Unlock() + + if !p.metricsStarted { + return + } + + close(p.metricsStop) + p.metricsWg.Wait() + p.metricsStarted = false +} + +// runMetricsWorker runs the background metrics update loop. +func (p *Processor) runMetricsWorker() { + defer p.metricsWg.Done() + + ticker := time.NewTicker(metricsUpdateInterval) + defer ticker.Stop() + + // Do initial update + p.updateMetricsBackground() + + for { + select { + case <-p.metricsStop: + return + case <-ticker.C: + p.updateMetricsBackground() + } + } +} + +// updateMetricsBackground updates expensive metrics in the background. +func (p *Processor) updateMetricsBackground() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Update blocks stored min/max + minBlock, maxBlock, err := p.stateManager.GetMinMaxStoredBlocks(ctx, p.network.Name, ProcessorName) + if err != nil { + p.log.WithError(err).WithField("network", p.network.Name).Debug("failed to get min/max stored blocks") + } else if minBlock != nil && maxBlock != nil { + common.BlocksStored.WithLabelValues(p.network.Name, ProcessorName, "min").Set(float64(minBlock.Int64())) + common.BlocksStored.WithLabelValues(p.network.Name, ProcessorName, "max").Set(float64(maxBlock.Int64())) + } + + // Update head distance metric + node := p.pool.GetHealthyExecutionNode() + + if node != nil { + if latestBlockNum, err := node.BlockNumber(ctx); err == nil && latestBlockNum != nil { + executionHead := new(big.Int).SetUint64(*latestBlockNum) + + distance, headType, err := p.stateManager.GetHeadDistance(ctx, ProcessorName, p.network.Name, p.processingMode, executionHead) + if err != nil { + p.log.WithError(err).Debug("Failed to calculate head distance in background metrics") + common.HeadDistance.WithLabelValues(p.network.Name, ProcessorName, "error").Set(-1) + } else { + common.HeadDistance.WithLabelValues(p.network.Name, ProcessorName, headType).Set(float64(distance)) + } + } + } +} diff --git a/pkg/processor/transaction/structlog_agg/tasks.go b/pkg/processor/transaction/structlog_agg/tasks.go new file mode 100644 index 0000000..ef40735 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/tasks.go @@ -0,0 +1,62 @@ +package structlog_agg + +import ( + "encoding/json" + "math/big" + + "github.com/hibiken/asynq" + + "github.com/ethpandaops/execution-processor/pkg/processor/tracker" +) + +const ( + ProcessorName = "transaction_structlog_agg" + ProcessForwardsTaskType = "transaction_structlog_agg_process_forwards" + ProcessBackwardsTaskType = "transaction_structlog_agg_process_backwards" +) + +// ProcessPayload represents the payload for processing a transaction. +// +//nolint:tagliatelle // Using snake_case for backwards compatibility +type ProcessPayload struct { + BlockNumber big.Int `json:"block_number"` + TransactionHash string `json:"transaction_hash"` + TransactionIndex uint32 `json:"transaction_index"` + NetworkName string `json:"network_name"` + Network string `json:"network"` // Alias for NetworkName + ProcessingMode string `json:"processing_mode"` // "forwards" or "backwards" +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (p *ProcessPayload) MarshalBinary() ([]byte, error) { + return json.Marshal(p) +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (p *ProcessPayload) UnmarshalBinary(data []byte) error { + return json.Unmarshal(data, p) +} + +// NewProcessForwardsTask creates a new forwards process task. +func NewProcessForwardsTask(payload *ProcessPayload) (*asynq.Task, error) { + payload.ProcessingMode = tracker.FORWARDS_MODE + + data, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + return asynq.NewTask(ProcessForwardsTaskType, data), nil +} + +// NewProcessBackwardsTask creates a new backwards process task. +func NewProcessBackwardsTask(payload *ProcessPayload) (*asynq.Task, error) { + payload.ProcessingMode = tracker.BACKWARDS_MODE + + data, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + return asynq.NewTask(ProcessBackwardsTaskType, data), nil +} diff --git a/pkg/processor/transaction/structlog_agg/transaction_processing.go b/pkg/processor/transaction/structlog_agg/transaction_processing.go new file mode 100644 index 0000000..34a3020 --- /dev/null +++ b/pkg/processor/transaction/structlog_agg/transaction_processing.go @@ -0,0 +1,457 @@ +package structlog_agg + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/sirupsen/logrus" + + pcommon "github.com/ethpandaops/execution-processor/pkg/common" + "github.com/ethpandaops/execution-processor/pkg/ethereum/execution" + "github.com/ethpandaops/execution-processor/pkg/processor/tracker" + "github.com/ethpandaops/execution-processor/pkg/processor/transaction/structlog" +) + +// Opcode constants for call and create operations. +const ( + opcodeCALL = "CALL" + opcodeCALLCODE = "CALLCODE" + opcodeDELEGATECALL = "DELEGATECALL" + opcodeSTATICCALL = "STATICCALL" + opcodeCREATE = "CREATE" + opcodeCREATE2 = "CREATE2" +) + +// ProcessTransaction processes a transaction and inserts aggregated call frame data to ClickHouse. +func (p *Processor) ProcessTransaction(ctx context.Context, block execution.Block, index int, tx execution.Transaction) (int, error) { + start := time.Now() + + defer func() { + duration := time.Since(start) + pcommon.TransactionProcessingDuration.WithLabelValues(p.network.Name, "structlog_agg").Observe(duration.Seconds()) + }() + + // Get trace from execution node + trace, err := p.getTransactionTrace(ctx, tx, block) + if err != nil { + return 0, fmt.Errorf("failed to get trace: %w", err) + } + + if len(trace.Structlogs) == 0 { + // No structlogs means no EVM execution (e.g., simple ETH transfer) + // We still emit a root frame for consistency, matching SQL simple_transfer_frames logic + rootFrame := CallFrameRow{ + CallFrameID: 0, + CallFramePath: []uint32{0}, + Depth: 0, + CallType: "", // Root frame has no initiating CALL opcode + Operation: "", // Summary row + OpcodeCount: 0, + Gas: 0, + GasCumulative: 0, + MinDepth: 0, + MaxDepth: 0, + } + + // Set target_address from transaction's to_address + if tx.To() != nil { + addr := tx.To().Hex() + rootFrame.TargetAddress = &addr + } + + // Set error_count: 1 if transaction failed, 0 otherwise + if trace.Failed { + rootFrame.ErrorCount = 1 + } else { + rootFrame.ErrorCount = 0 + } + + // For simple transfers (no EVM execution), all gas is intrinsic. + // This is true for both successful and failed transactions. + intrinsicGas := trace.Gas + rootFrame.IntrinsicGas = &intrinsicGas + + // gas_refund is NULL for simple transfers (no SSTORE operations) + + if err := p.insertCallFrames(ctx, []CallFrameRow{rootFrame}, block.Number().Uint64(), tx.Hash().String(), uint32(index), time.Now()); err != nil { //nolint:gosec // index is bounded by block.Transactions() length + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog_agg", "failed").Inc() + + return 0, fmt.Errorf("failed to insert call frames: %w", err) + } + + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog_agg", "success").Inc() + + return 1, nil + } + + // Check if GasUsed is pre-computed by the tracer (embedded mode) + precomputedGasUsed := hasPrecomputedGasUsed(trace.Structlogs) + + var gasUsed []uint64 + if !precomputedGasUsed { + gasUsed = computeGasUsed(trace.Structlogs) + } else { + // Extract pre-computed GasUsed values from structlogs (embedded mode) + gasUsed = make([]uint64, len(trace.Structlogs)) + for i := range trace.Structlogs { + gasUsed[i] = trace.Structlogs[i].GasUsed + } + } + + // Compute gasSelf: gas excluding child frame gas (for CALL/CREATE opcodes) + gasSelf := structlog.ComputeGasSelf(trace.Structlogs, gasUsed) + + // Check if CREATE/CREATE2 addresses are pre-computed by the tracer + precomputedCreateAddresses := hasPrecomputedCreateAddresses(trace.Structlogs) + + var createAddresses map[int]*string + if !precomputedCreateAddresses { + createAddresses = computeCreateAddresses(trace.Structlogs) + } + + // Initialize frame aggregator + aggregator := NewFrameAggregator() + + // Initialize call frame tracker (reusing the same logic as structlog processor) + callTracker := newCallTracker() + + // Process all structlogs + var prevStructlog *execution.StructLog + + for i := range trace.Structlogs { + sl := &trace.Structlogs[i] + + // Track call frame based on depth changes + frameID, framePath := callTracker.processDepthChange(sl.Depth) + + // Get call target address + callToAddr := p.extractCallAddressWithCreate(sl, i, createAddresses) + + // Process this structlog into the aggregator + aggregator.ProcessStructlog(sl, i, frameID, framePath, gasUsed[i], gasSelf[i], callToAddr, prevStructlog) + + // Check for EOA call: CALL-type opcode where depth stays the same (immediate return) + // and target is not a precompile + if isCallOpcode(sl.Op) && callToAddr != nil { + if i+1 < len(trace.Structlogs) { + nextDepth := trace.Structlogs[i+1].Depth + if nextDepth == sl.Depth && !isPrecompile(*callToAddr) { + // Emit synthetic EOA frame + eoaFrameID, eoaFramePath := callTracker.issueFrameID() + aggregator.ProcessStructlog(&execution.StructLog{ + Op: "", + Depth: sl.Depth + 1, + }, i, eoaFrameID, eoaFramePath, 0, 0, callToAddr, sl) + } + } + } + + prevStructlog = sl + } + + // Set root frame's target address from transaction's to_address + // (root frame has no initiating CALL, so target comes from tx.To()) + if tx.To() != nil { + addr := tx.To().Hex() + aggregator.SetRootTargetAddress(&addr) + } + + // Get receipt gas for intrinsic gas calculation + // For now, we use trace.Gas as a proxy (TODO: get actual receipt gas from block receipts) + receiptGas := trace.Gas + + // Finalize aggregation and get call frame rows + frames := aggregator.Finalize(trace, receiptGas) + + if len(frames) == 0 { + return 0, nil + } + + // Insert call frames to ClickHouse + if err := p.insertCallFrames(ctx, frames, block.Number().Uint64(), tx.Hash().String(), uint32(index), time.Now()); err != nil { //nolint:gosec // index is bounded by block.Transactions() length + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog_agg", "failed").Inc() + + return 0, fmt.Errorf("failed to insert call frames: %w", err) + } + + // Record success metrics + pcommon.TransactionsProcessed.WithLabelValues(p.network.Name, "structlog_agg", "success").Inc() + pcommon.ClickHouseInsertsRows.WithLabelValues(p.network.Name, ProcessorName, p.config.Table, "success", "").Add(float64(len(frames))) + + // Log progress for transactions with many frames + if len(frames) > 100 { + p.log.WithFields(logrus.Fields{ + "tx_hash": tx.Hash().String(), + "frame_count": len(frames), + }).Debug("Processed transaction with many call frames") + } + + return len(frames), nil +} + +// getTransactionTrace gets the trace for a transaction. +func (p *Processor) getTransactionTrace(ctx context.Context, tx execution.Transaction, block execution.Block) (*execution.TraceTransaction, error) { + // Get execution node + node := p.pool.GetHealthyExecutionNode() + if node == nil { + return nil, fmt.Errorf("no healthy execution node available") + } + + // Process transaction with timeout + processCtx, cancel := context.WithTimeout(ctx, tracker.DefaultTraceTimeout) + defer cancel() + + // Get transaction trace + trace, err := node.DebugTraceTransaction(processCtx, tx.Hash().String(), block.Number(), execution.StackTraceOptions()) + if err != nil { + return nil, fmt.Errorf("failed to trace transaction: %w", err) + } + + return trace, nil +} + +// extractCallAddressWithCreate extracts the call address, using createAddresses map for CREATE/CREATE2 opcodes. +func (p *Processor) extractCallAddressWithCreate(structLog *execution.StructLog, index int, createAddresses map[int]*string) *string { + // For CREATE/CREATE2, use the pre-computed address from the trace + if structLog.Op == opcodeCREATE || structLog.Op == opcodeCREATE2 { + if createAddresses != nil { + return createAddresses[index] + } + + return nil + } + + return p.extractCallAddress(structLog) +} + +// extractCallAddress extracts the call address from a structlog for CALL-family opcodes. +func (p *Processor) extractCallAddress(structLog *execution.StructLog) *string { + // Embedded mode: use pre-extracted CallToAddress + if structLog.CallToAddress != nil { + return structLog.CallToAddress + } + + // RPC mode fallback: extract from Stack for CALL-family opcodes + if structLog.Stack == nil || len(*structLog.Stack) < 2 { + return nil + } + + switch structLog.Op { + case opcodeCALL, opcodeCALLCODE, opcodeDELEGATECALL, opcodeSTATICCALL: + stackValue := (*structLog.Stack)[len(*structLog.Stack)-2] + addr := formatAddress(stackValue) + + return &addr + default: + return nil + } +} + +// formatAddress normalizes an address to exactly 42 characters (0x + 40 hex). +func formatAddress(addr string) string { + hex := strings.TrimPrefix(addr, "0x") + + if len(hex) > 40 { + hex = hex[len(hex)-40:] + } + + return fmt.Sprintf("0x%040s", hex) +} + +// isCallOpcode returns true if the opcode initiates a call that creates a child frame. +func isCallOpcode(op string) bool { + switch op { + case opcodeCALL, opcodeCALLCODE, opcodeDELEGATECALL, opcodeSTATICCALL: + return true + default: + return false + } +} + +// precompileAddresses contains all known EVM precompile addresses. +var precompileAddresses = map[string]bool{ + "0x0000000000000000000000000000000000000001": true, // ecrecover + "0x0000000000000000000000000000000000000002": true, // sha256 + "0x0000000000000000000000000000000000000003": true, // ripemd160 + "0x0000000000000000000000000000000000000004": true, // identity (dataCopy) + "0x0000000000000000000000000000000000000005": true, // modexp (bigModExp) + "0x0000000000000000000000000000000000000006": true, // bn256Add (ecAdd) + "0x0000000000000000000000000000000000000007": true, // bn256ScalarMul (ecMul) + "0x0000000000000000000000000000000000000008": true, // bn256Pairing (ecPairing) + "0x0000000000000000000000000000000000000009": true, // blake2f + "0x000000000000000000000000000000000000000a": true, // kzgPointEvaluation (EIP-4844, Cancun) + "0x000000000000000000000000000000000000000b": true, // bls12381G1Add (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000c": true, // bls12381G1MultiExp (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000d": true, // bls12381G2Add (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000e": true, // bls12381G2MultiExp (EIP-2537, Osaka) + "0x000000000000000000000000000000000000000f": true, // bls12381Pairing (EIP-2537, Osaka) + "0x0000000000000000000000000000000000000010": true, // bls12381MapG1 (EIP-2537, Osaka) + "0x0000000000000000000000000000000000000011": true, // bls12381MapG2 (EIP-2537, Osaka) + "0x0000000000000000000000000000000000000100": true, // p256Verify (EIP-7212, Osaka) +} + +// isPrecompile returns true if the address is a known EVM precompile. +func isPrecompile(addr string) bool { + hex := strings.TrimPrefix(strings.ToLower(addr), "0x") + + for len(hex) < 40 { + hex = "0" + hex + } + + return precompileAddresses["0x"+hex] +} + +// hasPrecomputedGasUsed detects whether GasUsed values are pre-computed by the tracer. +func hasPrecomputedGasUsed(structlogs []execution.StructLog) bool { + if len(structlogs) == 0 { + return false + } + + return structlogs[0].GasUsed > 0 +} + +// hasPrecomputedCreateAddresses detects whether CREATE/CREATE2 addresses are pre-computed. +func hasPrecomputedCreateAddresses(structlogs []execution.StructLog) bool { + for i := range structlogs { + op := structlogs[i].Op + if op == opcodeCREATE || op == opcodeCREATE2 { + return structlogs[i].CallToAddress != nil + } + } + + return false +} + +// computeGasUsed calculates the actual gas consumed for each structlog. +func computeGasUsed(structlogs []execution.StructLog) []uint64 { + if len(structlogs) == 0 { + return nil + } + + gasUsed := make([]uint64, len(structlogs)) + + // Initialize all with pre-calculated cost (fallback) + for i := range structlogs { + gasUsed[i] = structlogs[i].GasCost + } + + // pendingIdx[depth] = index into structlogs for the pending opcode at that depth + pendingIdx := make([]int, 0, 16) + + for i := range structlogs { + depth := int(structlogs[i].Depth) //nolint:gosec // EVM depth is capped at 1024 + + for len(pendingIdx) <= depth { + pendingIdx = append(pendingIdx, -1) + } + + // Clear pending indices from deeper levels + for d := len(pendingIdx) - 1; d > depth; d-- { + pendingIdx[d] = -1 + } + + // Update gasUsed for pending log at current depth + if prevIdx := pendingIdx[depth]; prevIdx >= 0 && prevIdx < len(structlogs) { + // Guard against underflow: if gas values are corrupted or out of order, + // fall back to the pre-calculated GasCost instead of underflowing + if structlogs[prevIdx].Gas >= structlogs[i].Gas { + gasUsed[prevIdx] = structlogs[prevIdx].Gas - structlogs[i].Gas + } + // else: keep the fallback GasCost value set during initialization + } + + pendingIdx[depth] = i + } + + return gasUsed +} + +// computeCreateAddresses pre-computes the created contract addresses for all CREATE/CREATE2 opcodes. +func computeCreateAddresses(structlogs []execution.StructLog) map[int]*string { + result := make(map[int]*string) + + type pendingCreate struct { + index int + depth uint64 + } + + var pending []pendingCreate + + for i, log := range structlogs { + // Resolve pending CREATEs that have completed + for len(pending) > 0 { + last := pending[len(pending)-1] + + if log.Depth <= last.depth && i > last.index { + if log.Stack != nil && len(*log.Stack) > 0 { + addr := formatAddress((*log.Stack)[len(*log.Stack)-1]) + result[last.index] = &addr + } + + pending = pending[:len(pending)-1] + } else { + break + } + } + + if log.Op == opcodeCREATE || log.Op == opcodeCREATE2 { + pending = append(pending, pendingCreate{index: i, depth: log.Depth}) + } + } + + return result +} + +// CallTracker tracks call frames during EVM opcode traversal. +type callTracker struct { + stack []callFrame + nextID uint32 + path []uint32 +} + +type callFrame struct { + id uint32 + depth uint64 +} + +func newCallTracker() *callTracker { + return &callTracker{ + stack: []callFrame{{id: 0, depth: 1}}, + nextID: 1, + path: []uint32{0}, + } +} + +func (ct *callTracker) processDepthChange(newDepth uint64) (frameID uint32, framePath []uint32) { + currentDepth := ct.stack[len(ct.stack)-1].depth + + if newDepth > currentDepth { + newFrame := callFrame{id: ct.nextID, depth: newDepth} + ct.stack = append(ct.stack, newFrame) + ct.path = append(ct.path, ct.nextID) + ct.nextID++ + } else if newDepth < currentDepth { + for len(ct.stack) > 1 && ct.stack[len(ct.stack)-1].depth > newDepth { + ct.stack = ct.stack[:len(ct.stack)-1] + ct.path = ct.path[:len(ct.path)-1] + } + } + + pathCopy := make([]uint32, len(ct.path)) + copy(pathCopy, ct.path) + + return ct.stack[len(ct.stack)-1].id, pathCopy +} + +func (ct *callTracker) issueFrameID() (frameID uint32, framePath []uint32) { + newID := ct.nextID + ct.nextID++ + + pathCopy := make([]uint32, len(ct.path)+1) + copy(pathCopy, ct.path) + pathCopy[len(ct.path)] = newID + + return newID, pathCopy +} diff --git a/pkg/rowbuffer/buffer.go b/pkg/rowbuffer/buffer.go new file mode 100644 index 0000000..c38d195 --- /dev/null +++ b/pkg/rowbuffer/buffer.go @@ -0,0 +1,340 @@ +// Package rowbuffer provides thread-safe row batching for ClickHouse inserts. +// It pools rows in memory across concurrent tasks and flushes when hitting +// a row limit or timer interval. +package rowbuffer + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/sirupsen/logrus" + + "github.com/ethpandaops/execution-processor/pkg/common" +) + +// FlushFunc is the function called to flush rows to the database. +type FlushFunc[R any] func(ctx context.Context, rows []R) error + +// Config holds configuration for the row buffer. +type Config struct { + MaxRows int // Flush threshold (default: 100000) + FlushInterval time.Duration // Max wait before flush (default: 1s) + Network string // For metrics + Processor string // For metrics + Table string // For metrics +} + +// waiter represents a task waiting for its rows to be flushed. +type waiter struct { + resultCh chan<- error + rowCount int +} + +// Buffer provides thread-safe row batching for ClickHouse inserts. +type Buffer[R any] struct { + mu sync.Mutex + rows []R + waiters []waiter + + config Config + flushFn FlushFunc[R] + log logrus.FieldLogger + + stopChan chan struct{} + stoppedChan chan struct{} + wg sync.WaitGroup + started bool +} + +// New creates a new Buffer with the given configuration and flush function. +func New[R any](cfg Config, flushFn FlushFunc[R], log logrus.FieldLogger) *Buffer[R] { + // Set defaults + if cfg.MaxRows <= 0 { + cfg.MaxRows = 100000 + } + + if cfg.FlushInterval <= 0 { + cfg.FlushInterval = time.Second + } + + return &Buffer[R]{ + rows: make([]R, 0, cfg.MaxRows), + waiters: make([]waiter, 0, 64), + config: cfg, + flushFn: flushFn, + log: log.WithField("component", "rowbuffer"), + stopChan: make(chan struct{}), + stoppedChan: make(chan struct{}), + } +} + +// Start starts the flush timer goroutine. +func (b *Buffer[R]) Start(ctx context.Context) error { + b.mu.Lock() + + if b.started { + b.mu.Unlock() + + return nil + } + + b.started = true + b.mu.Unlock() + + // Go 1.25: cleaner goroutine spawning with WaitGroup.Go + b.wg.Go(func() { b.runFlushTimer(ctx) }) + + b.log.WithFields(logrus.Fields{ + "max_rows": b.config.MaxRows, + "flush_interval": b.config.FlushInterval, + }).Debug("Row buffer started") + + return nil +} + +// Stop stops the buffer, flushing any remaining rows. +func (b *Buffer[R]) Stop(ctx context.Context) error { + b.mu.Lock() + + if !b.started { + b.mu.Unlock() + + return nil + } + + b.started = false + b.mu.Unlock() + + // Signal the flush timer to stop + close(b.stopChan) + + // Wait for the flush timer to exit + b.wg.Wait() + + // Flush any remaining rows + b.mu.Lock() + + if len(b.rows) > 0 { + rows := b.rows + waiters := b.waiters + b.rows = make([]R, 0, b.config.MaxRows) + b.waiters = make([]waiter, 0, 64) + b.mu.Unlock() + + err := b.doFlush(ctx, rows, waiters, "shutdown") + if err != nil { + b.log.WithError(err).Error("Failed to flush remaining rows on shutdown") + + return fmt.Errorf("failed to flush remaining rows: %w", err) + } + } else { + b.mu.Unlock() + } + + close(b.stoppedChan) + b.log.Debug("Row buffer stopped") + + return nil +} + +// Submit adds rows to the buffer and blocks until they are successfully flushed. +// Returns an error if the flush fails or the context is cancelled. +func (b *Buffer[R]) Submit(ctx context.Context, rows []R) error { + if len(rows) == 0 { + return nil + } + + // Create result channel + resultCh := make(chan error, 1) + + b.mu.Lock() + + // Check if buffer is stopped + if !b.started { + b.mu.Unlock() + + return fmt.Errorf("buffer is not started") + } + + // Add rows and waiter + b.rows = append(b.rows, rows...) + b.waiters = append(b.waiters, waiter{resultCh: resultCh, rowCount: len(rows)}) + + // Update pending metrics + common.RowBufferPendingRows.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, + ).Set(float64(len(b.rows))) + + common.RowBufferPendingTasks.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, + ).Set(float64(len(b.waiters))) + + // Check if we should flush (size trigger) + shouldFlush := len(b.rows) >= b.config.MaxRows + + var flushRows []R + + var flushWaiters []waiter + + if shouldFlush { + flushRows = b.rows + flushWaiters = b.waiters + b.rows = make([]R, 0, b.config.MaxRows) + b.waiters = make([]waiter, 0, 64) + } + + b.mu.Unlock() + + // Perform flush outside of lock if triggered by size + if shouldFlush { + go func() { + _ = b.doFlush(context.Background(), flushRows, flushWaiters, "size") + }() + } + + // Wait for result or context cancellation + select { + case err := <-resultCh: + return err + case <-ctx.Done(): + return ctx.Err() + case <-b.stopChan: + // Buffer is stopping, wait for flush result + select { + case err := <-resultCh: + return err + case <-ctx.Done(): + return ctx.Err() + } + } +} + +// runFlushTimer runs the periodic flush timer. +func (b *Buffer[R]) runFlushTimer(ctx context.Context) { + ticker := time.NewTicker(b.config.FlushInterval) + defer ticker.Stop() + + for { + select { + case <-b.stopChan: + return + case <-ctx.Done(): + return + case <-ticker.C: + b.flushOnTimer(ctx) + } + } +} + +// flushOnTimer attempts to flush buffered rows on timer trigger. +func (b *Buffer[R]) flushOnTimer(ctx context.Context) { + b.mu.Lock() + + if len(b.rows) == 0 { + b.mu.Unlock() + + return + } + + rows := b.rows + waiters := b.waiters + b.rows = make([]R, 0, b.config.MaxRows) + b.waiters = make([]waiter, 0, 64) + b.mu.Unlock() + + _ = b.doFlush(ctx, rows, waiters, "timer") +} + +// doFlush performs the actual flush and notifies all waiters. +func (b *Buffer[R]) doFlush(ctx context.Context, rows []R, waiters []waiter, trigger string) error { + if len(rows) == 0 { + return nil + } + + start := time.Now() + rowCount := len(rows) + + // Call the flush function + err := b.flushFn(ctx, rows) + + duration := time.Since(start) + + // Record metrics + status := "success" + if err != nil { + status = "failed" + } + + common.RowBufferFlushTotal.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, trigger, status, + ).Inc() + + common.RowBufferFlushDuration.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, + ).Observe(duration.Seconds()) + + common.RowBufferFlushSize.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, + ).Observe(float64(rowCount)) + + // Update pending metrics (now zero after flush) + b.mu.Lock() + common.RowBufferPendingRows.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, + ).Set(float64(len(b.rows))) + + common.RowBufferPendingTasks.WithLabelValues( + b.config.Network, b.config.Processor, b.config.Table, + ).Set(float64(len(b.waiters))) + b.mu.Unlock() + + // Log the flush + if err != nil { + b.log.WithError(err).WithFields(logrus.Fields{ + "rows": rowCount, + "waiters": len(waiters), + "trigger": trigger, + "duration": duration, + "processor": b.config.Processor, + "table": b.config.Table, + }).Error("ClickHouse flush failed") + } else { + b.log.WithFields(logrus.Fields{ + "rows": rowCount, + "waiters": len(waiters), + "trigger": trigger, + "duration": duration, + "processor": b.config.Processor, + "table": b.config.Table, + }).Debug("ClickHouse flush completed") + } + + // Notify all waiters + for _, w := range waiters { + select { + case w.resultCh <- err: + default: + // Waiter may have timed out, skip + } + } + + return err +} + +// Len returns the current number of buffered rows. +func (b *Buffer[R]) Len() int { + b.mu.Lock() + defer b.mu.Unlock() + + return len(b.rows) +} + +// WaiterCount returns the current number of waiting tasks. +func (b *Buffer[R]) WaiterCount() int { + b.mu.Lock() + defer b.mu.Unlock() + + return len(b.waiters) +} diff --git a/pkg/rowbuffer/buffer_test.go b/pkg/rowbuffer/buffer_test.go new file mode 100644 index 0000000..dbdbccc --- /dev/null +++ b/pkg/rowbuffer/buffer_test.go @@ -0,0 +1,384 @@ +package rowbuffer + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "testing/synctest" + "time" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestLogger() logrus.FieldLogger { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + return log +} + +func TestBuffer_FlushOnRowLimit(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + flushCalled := make(chan []int, 1) + buf := New(Config{MaxRows: 10, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + flushCalled <- rows + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { _ = buf.Stop(context.Background()) }() + + // Submit 10 rows - should trigger flush + go func() { + _ = buf.Submit(context.Background(), make([]int, 10)) + }() + + synctest.Wait() + + select { + case rows := <-flushCalled: + assert.Len(t, rows, 10) + default: + t.Fatal("flush not called") + } + }) +} + +func TestBuffer_FlushOnTimer(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + flushCalled := make(chan []int, 1) + buf := New(Config{MaxRows: 1000, FlushInterval: time.Second}, + func(ctx context.Context, rows []int) error { + flushCalled <- rows + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { _ = buf.Stop(context.Background()) }() + + // Submit 5 rows (below threshold) + go func() { + _ = buf.Submit(context.Background(), make([]int, 5)) + }() + + synctest.Wait() + + // Advance fake clock by 1 second + time.Sleep(time.Second) + + synctest.Wait() + + select { + case rows := <-flushCalled: + assert.Len(t, rows, 5) + default: + t.Fatal("timer flush not triggered") + } + }) +} + +func TestBuffer_ConcurrentSubmissions(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + var totalRows atomic.Int64 + + buf := New(Config{MaxRows: 100, FlushInterval: time.Second}, + func(ctx context.Context, rows []int) error { + totalRows.Add(int64(len(rows))) + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + // 50 goroutines each submitting 10 rows + for range 50 { + go func() { + _ = buf.Submit(context.Background(), make([]int, 10)) + }() + } + + synctest.Wait() + + // Advance time to trigger timer flush for any remaining rows + time.Sleep(time.Second) + + synctest.Wait() + + require.NoError(t, buf.Stop(context.Background())) + assert.Equal(t, int64(500), totalRows.Load()) + }) +} + +func TestBuffer_ErrorPropagation(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + expectedErr := errors.New("clickhouse error") + buf := New(Config{MaxRows: 10, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + return expectedErr + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { _ = buf.Stop(context.Background()) }() + + errChan := make(chan error, 3) + + // 3 goroutines submit, triggering flush + for range 3 { + go func() { + errChan <- buf.Submit(context.Background(), make([]int, 4)) + }() + } + + synctest.Wait() + + // All 3 should receive the error (2 submits trigger flush at 10 rows) + receivedCount := 0 + + for range 3 { + select { + case err := <-errChan: + require.ErrorIs(t, err, expectedErr) + + receivedCount++ + default: + // Some may still be pending + } + } + + // At least 2 should have received errors (the flush was triggered) + require.GreaterOrEqual(t, receivedCount, 2) + }) +} + +func TestBuffer_GracefulShutdown(t *testing.T) { + flushCalled := make(chan []int, 1) + buf := New(Config{MaxRows: 1000, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + flushCalled <- rows + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + // Submit rows below threshold in a goroutine + var wg sync.WaitGroup + + wg.Add(1) + + go func() { + defer wg.Done() + + _ = buf.Submit(context.Background(), make([]int, 50)) + }() + + // Give time for submit to start + time.Sleep(10 * time.Millisecond) + + // Stop should flush remaining rows + require.NoError(t, buf.Stop(context.Background())) + + // Wait for submit goroutine to finish + wg.Wait() + + select { + case rows := <-flushCalled: + assert.Len(t, rows, 50) + default: + t.Fatal("shutdown flush not called") + } +} + +func TestBuffer_ContextCancellation(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + buf := New(Config{MaxRows: 1000, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + // Simulate slow flush + time.Sleep(10 * time.Second) + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { _ = buf.Stop(context.Background()) }() + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + + errChan := make(chan error, 1) + + go func() { + errChan <- buf.Submit(ctx, make([]int, 5)) + }() + + synctest.Wait() + + // Advance time past the timeout + time.Sleep(20 * time.Millisecond) + + synctest.Wait() + + select { + case err := <-errChan: + require.ErrorIs(t, err, context.DeadlineExceeded) + default: + t.Fatal("expected context deadline error") + } + }) +} + +func TestBuffer_EmptySubmit(t *testing.T) { + buf := New(Config{MaxRows: 10, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + t.Fatal("flush should not be called for empty submit") + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { _ = buf.Stop(context.Background()) }() + + // Empty submit should return immediately without error + err := buf.Submit(context.Background(), []int{}) + require.NoError(t, err) +} + +func TestBuffer_NotStarted(t *testing.T) { + buf := New(Config{MaxRows: 10, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + return nil + }, newTestLogger()) + + // Submit without starting should fail + err := buf.Submit(context.Background(), []int{1, 2, 3}) + require.Error(t, err) + assert.Contains(t, err.Error(), "not started") +} + +func TestBuffer_LenAndWaiterCount(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + blockFlush := make(chan struct{}) + buf := New(Config{MaxRows: 1000, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + <-blockFlush + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { + close(blockFlush) + + _ = buf.Stop(context.Background()) + }() + + // Initially empty + assert.Equal(t, 0, buf.Len()) + assert.Equal(t, 0, buf.WaiterCount()) + + // Submit some rows + go func() { + _ = buf.Submit(context.Background(), make([]int, 5)) + }() + + synctest.Wait() + + // Should have rows and waiters pending + assert.Equal(t, 5, buf.Len()) + assert.Equal(t, 1, buf.WaiterCount()) + }) +} + +func TestBuffer_FlushTriggers(t *testing.T) { + tests := []struct { + name string + maxRows int + flushInterval time.Duration + submitRows int + waitTime time.Duration + expectFlush bool + }{ + {"size trigger", 10, time.Hour, 10, 0, true}, + {"size trigger partial", 10, time.Hour, 5, 0, false}, + {"timer trigger", 1000, 100 * time.Millisecond, 5, 150 * time.Millisecond, true}, + {"neither trigger", 1000, time.Hour, 5, 0, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + flushed := atomic.Bool{} + buf := New(Config{MaxRows: tc.maxRows, FlushInterval: tc.flushInterval}, + func(ctx context.Context, rows []int) error { + flushed.Store(true) + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + defer func() { _ = buf.Stop(context.Background()) }() + + go func() { + _ = buf.Submit(context.Background(), make([]int, tc.submitRows)) + }() + + synctest.Wait() + + if tc.waitTime > 0 { + time.Sleep(tc.waitTime) + + synctest.Wait() + } + + assert.Equal(t, tc.expectFlush, flushed.Load()) + }) + }) + } +} + +func TestBuffer_MultipleFlushes(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + flushCount := atomic.Int32{} + + var totalRows atomic.Int64 + + buf := New(Config{MaxRows: 10, FlushInterval: time.Hour}, + func(ctx context.Context, rows []int) error { + flushCount.Add(1) + totalRows.Add(int64(len(rows))) + + return nil + }, newTestLogger()) + + require.NoError(t, buf.Start(context.Background())) + + // Submit 25 rows - should trigger 2 flushes (10+10), leaving 5 + for range 25 { + go func() { + _ = buf.Submit(context.Background(), make([]int, 1)) + }() + } + + synctest.Wait() + + require.NoError(t, buf.Stop(context.Background())) + + // Should have flushed all 25 rows across multiple flushes + assert.Equal(t, int64(25), totalRows.Load()) + assert.GreaterOrEqual(t, flushCount.Load(), int32(2)) + }) +} diff --git a/pkg/state/manager.go b/pkg/state/manager.go index e7e99e2..9181c02 100644 --- a/pkg/state/manager.go +++ b/pkg/state/manager.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "math/big" + "sync" "time" "github.com/ethpandaops/execution-processor/pkg/clickhouse" @@ -12,6 +13,10 @@ import ( "github.com/sirupsen/logrus" ) +const ( + limiterCacheRefreshInterval = 6 * time.Second +) + // Sentinel errors. var ( ErrNoMoreBlocks = errors.New("no more blocks to process") @@ -25,6 +30,12 @@ type Manager struct { limiterTable string limiterEnabled bool network string + + // Limiter cache fields + limiterCacheMu sync.RWMutex + limiterCacheValue map[string]*big.Int // network -> max block + limiterCacheStop chan struct{} + limiterCacheStarted bool } func NewManager(log logrus.FieldLogger, config *Config) (*Manager, error) { @@ -84,6 +95,9 @@ func (s *Manager) Start(ctx context.Context) error { if err := s.startClientWithRetry(ctx, s.limiterClient, "limiter"); err != nil { return fmt.Errorf("failed to start limiter client: %w", err) } + + // Start the limiter cache refresh goroutine + s.startLimiterCacheRefresh() } return nil @@ -135,6 +149,16 @@ func (s *Manager) startClientWithRetry(ctx context.Context, client clickhouse.Cl func (s *Manager) Stop(ctx context.Context) error { var err error + // Stop the limiter cache refresh goroutine + s.limiterCacheMu.Lock() + + if s.limiterCacheStarted && s.limiterCacheStop != nil { + close(s.limiterCacheStop) + s.limiterCacheStarted = false + } + + s.limiterCacheMu.Unlock() + if stopErr := s.storageClient.Stop(); stopErr != nil { err = fmt.Errorf("failed to stop storage client: %w", stopErr) } @@ -253,6 +277,64 @@ func (s *Manager) NextBlock(ctx context.Context, processor, network, mode string return progressiveNext, nil } +// NextBlocks returns up to `count` sequential block numbers starting from what NextBlock returns. +// This is used for batch block fetching to get multiple consecutive blocks at once. +func (s *Manager) NextBlocks( + ctx context.Context, + processor, network, mode string, + chainHead *big.Int, + count int, +) ([]*big.Int, error) { + if count <= 0 { + return []*big.Int{}, nil + } + + // Get the first block number + firstBlock, err := s.NextBlock(ctx, processor, network, mode, chainHead) + if err != nil { + return nil, err + } + + if firstBlock == nil { + return []*big.Int{}, nil + } + + // Generate sequential block numbers + blocks := make([]*big.Int, 0, count) + blocks = append(blocks, firstBlock) + + for i := 1; i < count; i++ { + var nextBlock *big.Int + if mode == tracker.BACKWARDS_MODE { + // Backwards mode: decrement block numbers + nextBlock = new(big.Int).Sub(firstBlock, big.NewInt(int64(i))) + // Don't go below 0 + if nextBlock.Sign() < 0 { + break + } + } else { + // Forwards mode: increment block numbers + nextBlock = new(big.Int).Add(firstBlock, big.NewInt(int64(i))) + // Don't exceed chain head if provided + if chainHead != nil && nextBlock.Cmp(chainHead) > 0 { + break + } + } + + blocks = append(blocks, nextBlock) + } + + s.log.WithFields(logrus.Fields{ + "processor": processor, + "network": network, + "mode": mode, + "first_block": firstBlock.String(), + "count": len(blocks), + }).Debug("Generated batch of block numbers") + + return blocks, nil +} + func (s *Manager) getProgressiveNextBlock(ctx context.Context, processor, network string, chainHead *big.Int) (*big.Int, bool, error) { query := fmt.Sprintf(` SELECT block_number @@ -393,45 +475,32 @@ func (s *Manager) getProgressiveNextBlockBackwards(ctx context.Context, processo } func (s *Manager) getLimiterMaxBlock(ctx context.Context, network string) (*big.Int, error) { - query := fmt.Sprintf(` - SELECT max(execution_payload_block_number) AS block_number - FROM %s FINAL - WHERE meta_network_name = '%s' - `, s.limiterTable, network) - - s.log.WithFields(logrus.Fields{ - "network": network, - "table": s.limiterTable, - }).Debug("Querying for maximum execution payload block number") - - blockNumber, err := s.limiterClient.QueryUInt64(ctx, query, "block_number") - if err != nil { - return nil, fmt.Errorf("failed to get max execution payload block from %s: %w", s.limiterTable, err) - } + // Try to get from cache first + s.limiterCacheMu.RLock() + cachedValue, ok := s.limiterCacheValue[network] + s.limiterCacheMu.RUnlock() - // Check if we got a result - if blockNumber == nil { - // No blocks in limiter table, return genesis + if ok && cachedValue != nil { s.log.WithFields(logrus.Fields{ - "network": network, - }).Debug("No blocks found in limiter table, returning genesis block as max") + "network": network, + "limiter_max": cachedValue.String(), + }).Debug("Returning limiter max block from cache") - return big.NewInt(0), nil + return cachedValue, nil } - maxBlock := new(big.Int).SetUint64(*blockNumber) + // Cache miss - query directly (this should be rare after startup) s.log.WithFields(logrus.Fields{ - "network": network, - "limiter_max": maxBlock.String(), - }).Debug("Found maximum execution payload block number") + "network": network, + }).Debug("Limiter cache miss, querying directly") - return maxBlock, nil + return s.queryLimiterMaxBlock(ctx, network) } func (s *Manager) MarkBlockProcessed(ctx context.Context, blockNumber uint64, network, processor string) error { // Insert using direct string substitution for table name // Table name is validated during config initialization - query := fmt.Sprintf("INSERT INTO %s (updated_date_time, block_number, processor, meta_network_name) VALUES ('%s', %d, '%s', '%s')", s.storageTable, time.Now().Format("2006-01-02 15:04:05"), blockNumber, processor, network) + query := fmt.Sprintf("INSERT INTO %s (updated_date_time, block_number, processor, meta_network_name) VALUES ('%s', %d, '%s', '%s')", s.storageTable, time.Now().Format("2006-01-02 15:04:05.000"), blockNumber, processor, network) err := s.storageClient.Execute(ctx, query) if err != nil { @@ -453,7 +522,7 @@ func (s *Manager) MarkBlockEnqueued(ctx context.Context, blockNumber uint64, tas query := fmt.Sprintf( "INSERT INTO %s (updated_date_time, block_number, processor, meta_network_name, complete, task_count) VALUES ('%s', %d, '%s', '%s', 0, %d)", s.storageTable, - time.Now().Format("2006-01-02 15:04:05"), + time.Now().Format("2006-01-02 15:04:05.000"), blockNumber, processor, network, @@ -482,7 +551,7 @@ func (s *Manager) MarkBlockComplete(ctx context.Context, blockNumber uint64, net query := fmt.Sprintf( "INSERT INTO %s (updated_date_time, block_number, processor, meta_network_name, complete, task_count) VALUES ('%s', %d, '%s', '%s', 1, 0)", s.storageTable, - time.Now().Format("2006-01-02 15:04:05"), + time.Now().Format("2006-01-02 15:04:05.000"), blockNumber, processor, network, @@ -756,3 +825,96 @@ func (s *Manager) GetHeadDistance(ctx context.Context, processor, network, mode return distance, headType, nil } + +// startLimiterCacheRefresh starts the background goroutine for limiter cache refresh. +// It ensures only one refresh goroutine runs even if called multiple times. +func (s *Manager) startLimiterCacheRefresh() { + s.limiterCacheMu.Lock() + + if s.limiterCacheStarted { + s.limiterCacheMu.Unlock() + + return + } + + s.limiterCacheStarted = true + s.limiterCacheValue = make(map[string]*big.Int, 1) + s.limiterCacheStop = make(chan struct{}) + s.limiterCacheMu.Unlock() + + go s.refreshLimiterCacheLoop() +} + +// refreshLimiterCacheLoop runs the cache refresh loop in a background goroutine. +func (s *Manager) refreshLimiterCacheLoop() { + ticker := time.NewTicker(limiterCacheRefreshInterval) + defer ticker.Stop() + + // Do initial refresh immediately + s.refreshLimiterCache() + + for { + select { + case <-s.limiterCacheStop: + return + case <-ticker.C: + s.refreshLimiterCache() + } + } +} + +// refreshLimiterCache queries ClickHouse and updates the cache. +func (s *Manager) refreshLimiterCache() { + if s.network == "" { + return + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + maxBlock, err := s.queryLimiterMaxBlock(ctx, s.network) + if err != nil { + s.log.WithError(err).Warn("Failed to refresh limiter cache") + + return + } + + s.limiterCacheMu.Lock() + s.limiterCacheValue[s.network] = maxBlock + s.limiterCacheMu.Unlock() + + s.log.WithFields(logrus.Fields{ + "network": s.network, + "limiter_max": maxBlock.String(), + }).Debug("Refreshed limiter cache") +} + +// queryLimiterMaxBlock performs the actual ClickHouse query. +// Uses ORDER BY slot_start_date_time DESC LIMIT 1 to leverage the table's index. +func (s *Manager) queryLimiterMaxBlock(ctx context.Context, network string) (*big.Int, error) { + // Optimized query: uses ORDER BY index instead of MAX() which requires full table scan + query := fmt.Sprintf(` + SELECT toUInt64(ifNull(execution_payload_block_number, 0)) AS block_number + FROM %s + WHERE meta_network_name = '%s' + AND execution_payload_block_number IS NOT NULL + ORDER BY slot_start_date_time DESC + LIMIT 1 + `, s.limiterTable, network) + + s.log.WithFields(logrus.Fields{ + "network": network, + "table": s.limiterTable, + }).Debug("Querying for maximum execution payload block number") + + blockNumber, err := s.limiterClient.QueryUInt64(ctx, query, "block_number") + if err != nil { + return nil, fmt.Errorf("failed to query max execution payload block from %s: %w", s.limiterTable, err) + } + + if blockNumber == nil || *blockNumber == 0 { + return big.NewInt(0), nil + } + + return new(big.Int).SetUint64(*blockNumber), nil +} diff --git a/pkg/state/manager_test.go b/pkg/state/manager_test.go index a97e6f6..60bfa40 100644 --- a/pkg/state/manager_test.go +++ b/pkg/state/manager_test.go @@ -3,7 +3,9 @@ package state import ( "context" "math/big" + "sync" "testing" + "time" "github.com/ClickHouse/ch-go" "github.com/sirupsen/logrus" @@ -549,3 +551,203 @@ func TestGetNewestIncompleteBlock(t *testing.T) { }) } } + +func TestLimiterCache_Hit(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + mockLimiter := new(MockClickHouseClient) + + manager := &Manager{ + log: log.WithField("test", "cache_hit"), + limiterClient: mockLimiter, + limiterTable: "test_limiter_table", + limiterEnabled: true, + limiterCacheValue: make(map[string]*big.Int, 1), + network: "mainnet", + } + + // Pre-populate cache + manager.limiterCacheValue["mainnet"] = big.NewInt(12345) + + ctx := context.Background() + result, err := manager.getLimiterMaxBlock(ctx, "mainnet") + + // Should return cached value without querying + assert.NoError(t, err) + assert.Equal(t, big.NewInt(12345), result) + + // Mock should NOT have been called - that's the point of the cache + mockLimiter.AssertNotCalled(t, "QueryUInt64", mock.Anything, mock.Anything, mock.Anything) +} + +func TestLimiterCache_Miss(t *testing.T) { + ctx := context.Background() + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + mockLimiter := new(MockClickHouseClient) + mockLimiter.On("QueryUInt64", ctx, mock.AnythingOfType("string"), "block_number").Return(uint64Ptr(54321), nil) + + manager := &Manager{ + log: log.WithField("test", "cache_miss"), + limiterClient: mockLimiter, + limiterTable: "test_limiter_table", + limiterEnabled: true, + limiterCacheValue: make(map[string]*big.Int, 1), + network: "mainnet", + } + + // Cache is empty, so should query + result, err := manager.getLimiterMaxBlock(ctx, "mainnet") + + assert.NoError(t, err) + assert.Equal(t, big.NewInt(54321), result) + + mockLimiter.AssertExpectations(t) +} + +func TestLimiterCache_RefreshUpdatesCache(t *testing.T) { + ctx := context.Background() + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + mockLimiter := new(MockClickHouseClient) + mockLimiter.On("QueryUInt64", mock.Anything, mock.AnythingOfType("string"), "block_number").Return(uint64Ptr(99999), nil) + + manager := &Manager{ + log: log.WithField("test", "refresh"), + limiterClient: mockLimiter, + limiterTable: "test_limiter_table", + limiterEnabled: true, + limiterCacheValue: make(map[string]*big.Int, 1), + network: "mainnet", + } + + // Refresh the cache + manager.refreshLimiterCache() + + // Wait a bit for the refresh to complete + time.Sleep(100 * time.Millisecond) + + // Now getLimiterMaxBlock should return cached value + result, err := manager.getLimiterMaxBlock(ctx, "mainnet") + + assert.NoError(t, err) + assert.Equal(t, big.NewInt(99999), result) + + // QueryUInt64 should only be called once (from refresh), not from getLimiterMaxBlock + mockLimiter.AssertNumberOfCalls(t, "QueryUInt64", 1) +} + +func TestLimiterCache_ConcurrentAccess(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + mockLimiter := new(MockClickHouseClient) + + manager := &Manager{ + log: log.WithField("test", "concurrent"), + limiterClient: mockLimiter, + limiterTable: "test_limiter_table", + limiterEnabled: true, + limiterCacheValue: make(map[string]*big.Int, 1), + network: "mainnet", + } + + // Pre-populate cache + manager.limiterCacheValue["mainnet"] = big.NewInt(12345) + + // Run concurrent reads + var wg sync.WaitGroup + + for i := 0; i < 100; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + + ctx := context.Background() + result, err := manager.getLimiterMaxBlock(ctx, "mainnet") + + assert.NoError(t, err) + assert.Equal(t, big.NewInt(12345), result) + }() + } + + wg.Wait() + + // No queries should have been made + mockLimiter.AssertNotCalled(t, "QueryUInt64", mock.Anything, mock.Anything, mock.Anything) +} + +func TestLimiterCache_SingleStart(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + mockLimiter := new(MockClickHouseClient) + // Return a value for the initial refresh + mockLimiter.On("QueryUInt64", mock.Anything, mock.AnythingOfType("string"), "block_number").Return(uint64Ptr(12345), nil) + + manager := &Manager{ + log: log.WithField("test", "single_start"), + limiterClient: mockLimiter, + limiterTable: "test_limiter_table", + limiterEnabled: true, + network: "mainnet", + } + + // Start multiple times concurrently + var wg sync.WaitGroup + + for i := 0; i < 10; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + + manager.startLimiterCacheRefresh() + }() + } + + wg.Wait() + + // Should only have started once + assert.True(t, manager.limiterCacheStarted) + assert.NotNil(t, manager.limiterCacheStop) + + // Clean up + manager.limiterCacheMu.Lock() + + if manager.limiterCacheStarted && manager.limiterCacheStop != nil { + close(manager.limiterCacheStop) + manager.limiterCacheStarted = false + } + + manager.limiterCacheMu.Unlock() + + // Give goroutine time to exit + time.Sleep(100 * time.Millisecond) +} + +func TestLimiterCache_NoNetworkSkipsRefresh(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + mockLimiter := new(MockClickHouseClient) + + manager := &Manager{ + log: log.WithField("test", "no_network"), + limiterClient: mockLimiter, + limiterTable: "test_limiter_table", + limiterEnabled: true, + limiterCacheValue: make(map[string]*big.Int, 1), + network: "", // Empty network + } + + // Refresh should be a no-op + manager.refreshLimiterCache() + + // No queries should have been made + mockLimiter.AssertNotCalled(t, "QueryUInt64", mock.Anything, mock.Anything, mock.Anything) +}