diff --git a/cmd/flamegraph/flamegraph.go b/cmd/flamegraph/flamegraph.go index 802b5b3b..692ef8de 100644 --- a/cmd/flamegraph/flamegraph.go +++ b/cmd/flamegraph/flamegraph.go @@ -27,6 +27,7 @@ var examples = []string{ fmt.Sprintf(" Flamegraph from local host: $ %s %s", app.Name, cmdName), fmt.Sprintf(" Flamegraph from remote target: $ %s %s --target 192.168.1.1 --user fred --key fred_key", app.Name, cmdName), fmt.Sprintf(" Flamegraph from multiple targets: $ %s %s --targets targets.yaml", app.Name, cmdName), + fmt.Sprintf(" Flamegraph for cache misses: $ %s %s --perf-event cache-misses", app.Name, cmdName), } var Cmd = &cobra.Command{ @@ -48,6 +49,7 @@ var ( flagPids []int flagNoSystemSummary bool flagMaxDepth int + flagPerfEvent string ) const ( @@ -56,6 +58,7 @@ const ( flagPidsName = "pids" flagNoSystemSummaryName = "no-summary" flagMaxDepthName = "max-depth" + flagPerfEventName = "perf-event" ) func init() { @@ -66,6 +69,7 @@ func init() { Cmd.Flags().IntSliceVar(&flagPids, flagPidsName, nil, "") Cmd.Flags().BoolVar(&flagNoSystemSummary, flagNoSystemSummaryName, false, "") Cmd.Flags().IntVar(&flagMaxDepth, flagMaxDepthName, 0, "") + Cmd.Flags().StringVar(&flagPerfEvent, flagPerfEventName, "cycles:P", "") workflow.AddTargetFlags(Cmd) @@ -113,13 +117,17 @@ func getFlagGroups() []app.FlagGroup { Help: "comma separated list of PIDs. If not specified, all PIDs will be collected", }, { - Name: app.FlagFormatName, - Help: fmt.Sprintf("choose output format(s) from: %s", strings.Join(append([]string{report.FormatAll}, report.FormatHtml, report.FormatTxt, report.FormatJson), ", ")), + Name: flagPerfEventName, + Help: "perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.)", }, { Name: flagMaxDepthName, Help: "maximum render depth of call stack in flamegraph (0 = no limit)", }, + { + Name: app.FlagFormatName, + Help: fmt.Sprintf("choose output format(s) from: %s", strings.Join(append([]string{report.FormatAll}, report.FormatHtml, report.FormatTxt, report.FormatJson), ", ")), + }, { Name: flagNoSystemSummaryName, Help: "do not include system summary table in report", @@ -183,7 +191,7 @@ func runCmd(cmd *cobra.Command, args []string) error { if !flagNoSystemSummary { tables = append(tables, app.TableDefinitions[app.SystemSummaryTableName]) } - tables = append(tables, tableDefinitions[CallStackFrequencyTableName]) + tables = append(tables, tableDefinitions[FlameGraphTableName]) reportingCommand := workflow.ReportingCommand{ Cmd: cmd, ReportNamePost: "flame", @@ -192,11 +200,12 @@ func runCmd(cmd *cobra.Command, args []string) error { "Duration": strconv.Itoa(flagDuration), "PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","), "MaxDepth": strconv.Itoa(flagMaxDepth), + "PerfEvent": flagPerfEvent, }, Tables: tables, } - report.RegisterHTMLRenderer(CallStackFrequencyTableName, callStackFrequencyTableHTMLRenderer) + report.RegisterHTMLRenderer(FlameGraphTableName, callStackFrequencyTableHTMLRenderer) return reportingCommand.Run() } diff --git a/cmd/flamegraph/flamegraph_renderers.go b/cmd/flamegraph/flamegraph_renderers.go index ac4d4b15..5cd2c7fb 100644 --- a/cmd/flamegraph/flamegraph_renderers.go +++ b/cmd/flamegraph/flamegraph_renderers.go @@ -185,7 +185,7 @@ func renderFlameGraph(header string, tableValues table.TableValues, field string fg := texttemplate.Must(texttemplate.New("flameGraphTemplate").Parse(flameGraphTemplate)) buf := new(bytes.Buffer) err = fg.Execute(buf, flameGraphTemplateStruct{ - ID: fmt.Sprintf("%d%s", util.RandUint(10000), header), + ID: fmt.Sprintf("%d%s", util.RandUint(10000), strings.Split(header, " ")[0]), Data: jsonStacks, Header: header, }) @@ -223,7 +223,14 @@ func callStackFrequencyTableHTMLRenderer(tableValues table.TableValues, targetNa } ` - out += renderFlameGraph("Native", tableValues, "Native Stacks") - out += renderFlameGraph("Java", tableValues, "Java Stacks") + // get the perf event from the table values + perfEventFieldIndex, err := table.GetFieldIndex("Perf Event", tableValues) + if err != nil { + slog.Error("didn't find expected field (Perf Event) in table", slog.String("error", err.Error())) + return out + } + perfEvent := tableValues.Fields[perfEventFieldIndex].Values[0] + out += renderFlameGraph(fmt.Sprintf("Native (%s)", perfEvent), tableValues, "Native Stacks") + out += renderFlameGraph("Java (async-profiler)", tableValues, "Java Stacks") return out } diff --git a/cmd/flamegraph/flamegraph_tables.go b/cmd/flamegraph/flamegraph_tables.go index 5fb655c0..1983614e 100644 --- a/cmd/flamegraph/flamegraph_tables.go +++ b/cmd/flamegraph/flamegraph_tables.go @@ -18,35 +18,36 @@ import ( // flamegraph table names const ( - CallStackFrequencyTableName = "Call Stack Frequency" + FlameGraphTableName = "Flamegraph" ) // flamegraph tables var tableDefinitions = map[string]table.TableDefinition{ - CallStackFrequencyTableName: { - Name: CallStackFrequencyTableName, - MenuLabel: CallStackFrequencyTableName, + FlameGraphTableName: { + Name: FlameGraphTableName, + MenuLabel: FlameGraphTableName, ScriptNames: []string{ - script.CollapsedCallStacksScriptName, + script.FlameGraphScriptName, }, - FieldsFunc: callStackFrequencyTableValues}, + FieldsFunc: flameGraphTableValues}, } -func callStackFrequencyTableValues(outputs map[string]script.ScriptOutput) []table.Field { +func flameGraphTableValues(outputs map[string]script.ScriptOutput) []table.Field { fields := []table.Field{ {Name: "Native Stacks", Values: []string{nativeFoldedFromOutput(outputs)}}, {Name: "Java Stacks", Values: []string{javaFoldedFromOutput(outputs)}}, {Name: "Maximum Render Depth", Values: []string{maxRenderDepthFromOutput(outputs)}}, + {Name: "Perf Event", Values: []string{perfEventFromOutput(outputs)}}, } return fields } func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string { - if outputs[script.CollapsedCallStacksScriptName].Stdout == "" { + if outputs[script.FlameGraphScriptName].Stdout == "" { slog.Warn("collapsed call stack output is empty") return "" } - sections := extract.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout) + sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout) if len(sections) == 0 { slog.Warn("no sections in collapsed call stack output") return "" @@ -84,11 +85,11 @@ func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string { - if outputs[script.CollapsedCallStacksScriptName].Stdout == "" { + if outputs[script.FlameGraphScriptName].Stdout == "" { slog.Warn("collapsed call stack output is empty") return "" } - sections := extract.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout) + sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout) if len(sections) == 0 { slog.Warn("no sections in collapsed call stack output") return "" @@ -103,6 +104,11 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } } if dwarfFolded == "" && fpFolded == "" { + slog.Warn("no native folded stacks found") + // "event syntax error: 'foo'" indicates that the perf event specified is invalid/unsupported + if strings.Contains(outputs[script.FlameGraphScriptName].Stderr, "event syntax error") { + slog.Error("unsupported perf event specified", slog.String("error", outputs[script.FlameGraphScriptName].Stderr)) + } return "" } folded, err := mergeSystemFolded(fpFolded, dwarfFolded) @@ -113,11 +119,11 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } func maxRenderDepthFromOutput(outputs map[string]script.ScriptOutput) string { - if outputs[script.CollapsedCallStacksScriptName].Stdout == "" { + if outputs[script.FlameGraphScriptName].Stdout == "" { slog.Warn("collapsed call stack output is empty") return "" } - sections := extract.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout) + sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout) if len(sections) == 0 { slog.Warn("no sections in collapsed call stack output") return "" @@ -130,6 +136,24 @@ func maxRenderDepthFromOutput(outputs map[string]script.ScriptOutput) string { return "" } +func perfEventFromOutput(outputs map[string]script.ScriptOutput) string { + if outputs[script.FlameGraphScriptName].Stdout == "" { + slog.Warn("collapsed call stack output is empty") + return "" + } + sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout) + if len(sections) == 0 { + slog.Warn("no sections in collapsed call stack output") + return "" + } + for header, content := range sections { + if header == "perf_event" { + return strings.TrimSpace(content) + } + } + return "" +} + // ProcessStacks ... // [processName][callStack]=count type ProcessStacks map[string]Stacks diff --git a/internal/script/scripts.go b/internal/script/scripts.go index ab42faa9..af856e87 100644 --- a/internal/script/scripts.go +++ b/internal/script/scripts.go @@ -120,7 +120,7 @@ const ( GaudiTelemetryScriptName = "gaudi telemetry" PDUTelemetryScriptName = "pdu telemetry" // flamegraph scripts - CollapsedCallStacksScriptName = "collapsed call stacks" + FlameGraphScriptName = "flamegraph" // lock scripts ProfileKernelLockScriptName = "profile kernel lock" ) @@ -1275,7 +1275,7 @@ if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then fi LC_TIME=C mpstat -u -T -I SCPU -P ALL $interval $count `, - Superuser: true, + Superuser: false, Lkms: []string{}, Depends: []string{"mpstat"}, }, @@ -1288,7 +1288,7 @@ if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then fi S_TIME_FORMAT=ISO iostat -d -t $interval $count | sed '/^loop/d' `, - Superuser: true, + Superuser: false, Lkms: []string{}, Depends: []string{"iostat"}, }, @@ -1301,7 +1301,7 @@ if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then fi LC_TIME=C sar -r $interval $count `, - Superuser: true, + Superuser: false, Lkms: []string{}, Depends: []string{"sar", "sadc"}, }, @@ -1314,7 +1314,7 @@ if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then fi LC_TIME=C sar -n DEV $interval $count `, - Superuser: true, + Superuser: false, Lkms: []string{}, Depends: []string{"sar", "sadc"}, }, @@ -1445,13 +1445,14 @@ done Superuser: false, }, // flamegraph scripts - CollapsedCallStacksScriptName: { - Name: CollapsedCallStacksScriptName, + FlameGraphScriptName: { + Name: FlameGraphScriptName, ScriptTemplate: `# Combined (perf record and async profiler) call stack collection pids={{.PIDs}} duration={{.Duration}} frequency={{.Frequency}} maxdepth={{.MaxDepth}} +perf_event={{.PerfEvent}} ap_interval=0 if [ "$frequency" -ne 0 ]; then @@ -1497,6 +1498,9 @@ print_results() { echo "########## maximum depth ##########" echo "$maxdepth" + echo "########## perf_event ##########" + echo "$perf_event" + if [ -f perf_dwarf_folded ]; then echo "########## perf_dwarf ##########" cat perf_dwarf_folded @@ -1559,9 +1563,9 @@ fi # Start profiling with perf in frame pointer mode if [ -n "$pids" ]; then - perf record -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 & + perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 & else - perf record -F "$frequency" -a -g -o perf_fp_data -m 129 & + perf record -e "$perf_event" -F "$frequency" -a -g -o perf_fp_data -m 129 & fi perf_fp_pid=$! if ! kill -0 $perf_fp_pid 2>/dev/null; then @@ -1572,9 +1576,9 @@ fi # Start profiling with perf in dwarf mode if [ -n "$pids" ]; then - perf record -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & + perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & else - perf record -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & + perf record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & fi perf_dwarf_pid=$! if ! kill -0 $perf_dwarf_pid 2>/dev/null; then diff --git a/tools/stackcollapse-perf/stackcollapse-perf.go b/tools/stackcollapse-perf/stackcollapse-perf.go index 74b1094e..f8cc9508 100644 --- a/tools/stackcollapse-perf/stackcollapse-perf.go +++ b/tools/stackcollapse-perf/stackcollapse-perf.go @@ -114,7 +114,7 @@ func main() { input = os.Stdin } - err = ProcessStacks(input, os.Stdout, config) + err = ProcessStacks(input, os.Stdout, os.Stderr, config) if err != nil { fmt.Fprintf(os.Stderr, "Error processing stacks: %s\n", err) os.Exit(1) @@ -133,12 +133,14 @@ var ( // ProcessStacks processes stack traces from the input reader and writes the collapsed stacks to the output writer. // It uses the provided configuration to control the processing behavior. -func ProcessStacks(input io.Reader, output io.Writer, config Config) error { +func ProcessStacks(input io.Reader, output io.Writer, errorOutput io.Writer, config Config) error { var stack []string var processName string var period int aggregator := NewStackAggregator() scanner := bufio.NewScanner(input) + eventFilter := config.EventFilter // if not set, it will be set to the first event encountered + skipStackLines := false // whether to skip stack lines based on event filtering // main loop, read lines from stdin for scanner.Scan() { @@ -165,25 +167,35 @@ func ProcessStacks(input io.Reader, output io.Writer, config Config) error { } // check for event record if eventLineRegex.MatchString(line) { + skipStackLines = false var err error - processName, period, err = handleEventRecord(line, config) + var event string + processName, period, event, err = handleEventRecord(line, config) if err != nil { - fmt.Fprintf(output, "Error: %s\n", err) + fmt.Fprintf(errorOutput, "Error: %s\n", err) + skipStackLines = true + continue + } + if eventFilter == "" { + eventFilter = event // default to first event + } else if event != eventFilter { + fmt.Fprintf(errorOutput, "Skipping event %s, filtering for %s\n", event, eventFilter) + skipStackLines = true // need to skip stack lines for this event } continue } // check for stack line - if stackLineRegex.MatchString(line) { + if stackLineRegex.MatchString(line) && !skipStackLines { err := handleStackLine(line, &stack, processName, config) if err != nil { - fmt.Fprintf(output, "Error: %s\n", err) + fmt.Fprintf(errorOutput, "Error: %s\n", err) } continue } } // Check for errors during scanning if err := scanner.Err(); err != nil { - fmt.Fprintf(os.Stderr, "Error reading input: %s\n", err) + fmt.Fprintf(errorOutput, "Error reading input: %s\n", err) return err } // Output results @@ -199,7 +211,7 @@ func ProcessStacks(input io.Reader, output io.Writer, config Config) error { } // handleEventRecord parses an event record line and updates the process name and period based on the configuration. -func handleEventRecord(line string, config Config) (processName string, period int, err error) { +func handleEventRecord(line string, config Config) (processName string, period int, event string, err error) { matches := eventLineRegex.FindStringSubmatch(line) if matches == nil { return @@ -224,14 +236,7 @@ func handleEventRecord(line string, config Config) (processName string, period i } period = eventPeriodInt } - event := eventMatches[2] - - if config.EventFilter == "" { - config.EventFilter = event - } else if event != config.EventFilter { - err = fmt.Errorf("event type mismatch: %s != %s", event, config.EventFilter) - return - } + event = eventMatches[2] } if config.IncludeTid { diff --git a/tools/stackcollapse-perf/stackcollapse-perf_test.go b/tools/stackcollapse-perf/stackcollapse-perf_test.go index 053d029b..46b09039 100644 --- a/tools/stackcollapse-perf/stackcollapse-perf_test.go +++ b/tools/stackcollapse-perf/stackcollapse-perf_test.go @@ -50,6 +50,7 @@ stress-ng-cpu 1230793 [098] 6223127.074783: 307465331 cycles:P: `) output := &bytes.Buffer{} + errorOutput := &bytes.Buffer{} config := Config{ IncludePname: true, @@ -60,7 +61,7 @@ stress-ng-cpu 1230793 [098] 6223127.074783: 307465331 cycles:P: TidyGeneric: true, } - err := ProcessStacks(input, output, config) + err := ProcessStacks(input, output, errorOutput, config) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -69,6 +70,10 @@ stress-ng-cpu 1230793 [098] 6223127.074783: 307465331 cycles:P: if output.String() != expected { t.Errorf("expected %q, got %q", expected, output.String()) } + + if errorOutput.Len() != 0 { + t.Errorf("expected no error output, got %q", errorOutput.String()) + } } func TestHandleEventRecord(t *testing.T) { @@ -84,7 +89,7 @@ func TestHandleEventRecord(t *testing.T) { TidyGeneric: true, } - processName, period, err := handleEventRecord(line, config) + processName, period, event, err := handleEventRecord(line, config) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -97,6 +102,10 @@ func TestHandleEventRecord(t *testing.T) { if period != expectedPeriod { t.Errorf("expected period to be %d, got %d", expectedPeriod, period) } + expectedEvent := "cycles:P" + if event != expectedEvent { + t.Errorf("expected event to be '%s', got %q", expectedEvent, event) + } } func TestHandleStackLine(t *testing.T) {