diff --git a/.claude/skills/cicd-diagnostics/BEST_PRACTICES_ASSESSMENT.md b/.claude/skills/cicd-diagnostics/BEST_PRACTICES_ASSESSMENT.md new file mode 100644 index 000000000000..22aab133e0b4 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/BEST_PRACTICES_ASSESSMENT.md @@ -0,0 +1,102 @@ +# Skill Best Practices Assessment + +## ✅ Best Practices Compliance + +### Required Metadata (All Present) +- ✅ **name**: `cicd-diagnostics` (15 chars, under 64 limit) +- ✅ **description**: 199 characters (under 200 limit) - concise and specific +- ✅ **version**: `2.0.0` (tracking versions) +- ✅ **dependencies**: `python>=3.8` (clearly specified) + +### Best Practice Guidelines + +#### ✅ Focused on One Workflow +The skill is focused on CI/CD failure diagnosis - a single, well-defined task. + +#### ✅ Clear Instructions +The skill provides comprehensive instructions for: +- When to use the skill (extensive trigger list) +- How to use the skill (step-by-step workflow) +- What utilities are available +- Examples throughout + +#### ✅ Examples Included +The skill includes: +- Code examples for Python utilities +- Example prompts that trigger the skill +- Example analysis outputs +- Example diagnostic reports + +#### ✅ Defines When to Use +Extensive "When to Use This Skill" section with: +- Primary triggers (always use) +- Context indicators (use when mentioned) +- Don't use scenarios (when NOT to use) + +### ⚠️ Areas for Improvement + +#### 1. File Length +- **Current**: 1,130 lines +- **Best Practice**: Keep concise (<500 lines recommended) +- **Issue**: SKILL.md is very comprehensive but verbose +- **Recommendation**: Consider moving detailed sections to reference files (REFERENCE.md) + +#### 2. Duplicate Files +- **Issue**: Both `Skill.md` and `SKILL.md` exist (appear identical) +- **Recommendation**: Use only `SKILL.md` (uppercase) per Claude conventions + +#### 3. Structure Alignment +- **Current**: Single large SKILL.md with all content +- **Best Practice**: Use progressive disclosure with reference files +- **Recommendation**: Move detailed technical content to REFERENCE.md + +### Comparison with Example Skills + +#### Similarities to Examples: +- ✅ YAML frontmatter with required fields +- ✅ Clear description under 200 chars +- ✅ Version tracking +- ✅ Dependencies specified +- ✅ Python scripts for utilities +- ✅ Clear when-to-use guidance + +#### Differences from Examples: +- ⚠️ Much longer than typical examples (examples are usually 200-500 lines) +- ⚠️ More comprehensive/verbose than typical +- ⚠️ Could benefit from progressive disclosure (main SKILL.md + REFERENCE.md) + +### Recommendations + +1. **Keep SKILL.md focused on core workflow** (<500 lines) + - Move detailed technical content to REFERENCE.md + - Keep examples concise + - Focus on "how to use" not "everything about" + +2. **Remove duplicate file** + - Keep only `SKILL.md` (uppercase) + - Delete `Skill.md` if identical + +3. **Maintain current strengths** + - Excellent description (199 chars, specific) + - Clear Python implementation + - Good examples + - Well-defined triggers + +### Overall Assessment + +**Score: 8/10** + +**Strengths:** +- ✅ Excellent metadata (all required fields, proper length) +- ✅ Clear Python implementation (best practice) +- ✅ Comprehensive examples +- ✅ Well-defined use cases +- ✅ Version tracking + +**Areas for Improvement:** +- ⚠️ File length (too verbose for SKILL.md) +- ⚠️ Consider progressive disclosure structure +- ⚠️ Remove duplicate file + +**Conclusion:** The skill follows most best practices well, especially the critical ones (description length, Python implementation, clear triggers). The main improvement would be to make SKILL.md more concise by moving detailed content to reference files, following the progressive disclosure pattern recommended in best practices. + diff --git a/.claude/skills/cicd-diagnostics/BEST_PRACTICES_COMPLIANCE.md b/.claude/skills/cicd-diagnostics/BEST_PRACTICES_COMPLIANCE.md new file mode 100644 index 000000000000..5f8f15565d0d --- /dev/null +++ b/.claude/skills/cicd-diagnostics/BEST_PRACTICES_COMPLIANCE.md @@ -0,0 +1,130 @@ +# Best Practices Compliance Assessment + +Based on: https://docs.claude.com/en/docs/agents-and-tools/agent-skills/best-practices + +## ✅ Fully Compliant + +### 1. Naming Conventions +- ✅ **SKILL.md** (uppercase) - Correct convention +- ✅ **name**: `cicd-diagnostics` (lowercase, hyphens, under 64 chars) +- ✅ **File naming**: Descriptive names (workspace.py, github_api.py, evidence.py) + +### 2. YAML Frontmatter +- ✅ **name**: Present, valid format (lowercase, hyphens) +- ✅ **description**: Present, 199 chars (under 1024 limit) +- ✅ **version**: Present (2.0.0) - optional but good practice +- ✅ **dependencies**: Present (python>=3.8) - optional but good practice + +### 3. Description Quality +- ✅ Describes what the skill does +- ✅ Describes when to use it +- ✅ Includes key terms (CI/CD, GitHub Actions, DotCMS, failures, tests) +- ✅ Concise and specific + +### 4. File Structure +- ✅ Uses forward slashes (no Windows paths) +- ✅ Descriptive file names +- ✅ Organized directory structure (utils/ subdirectory) +- ✅ Reference files exist (WORKFLOWS.md, LOG_ANALYSIS.md, etc.) + +### 5. Code and Scripts +- ✅ Python scripts solve problems (don't punt to Claude) +- ✅ Clear documentation in scripts +- ✅ No Windows-style paths +- ✅ Dependencies clearly listed + +## ⚠️ Areas Needing Improvement + +### 1. SKILL.md Length (CRITICAL) +- **Current**: 1,042 lines +- **Best Practice**: Under 500 lines for optimal performance +- **Issue**: SKILL.md is too verbose - exceeds recommended length by 2x +- **Impact**: Higher token usage, slower loading, harder for Claude to navigate + +**Recommendation**: Apply progressive disclosure pattern: +- Keep core workflow in SKILL.md (<500 lines) +- Move detailed technical content to REFERENCE.md +- Move extensive examples to EXAMPLES.md +- Keep "When to Use" section but make it more concise + +### 2. Progressive Disclosure +- **Current**: Some reference files exist but SKILL.md still contains too much detail +- **Best Practice**: SKILL.md should be high-level guide pointing to reference files +- **Recommendation**: Refactor to follow Pattern 1 (High-level guide with references) + +### 3. Concise Content +- **Current**: Some sections explain things Claude already knows +- **Best Practice**: "Default assumption: Claude is already very smart" +- **Recommendation**: Remove explanations of basic concepts (what GitHub Actions is, what Python is, etc.) + +## 📋 Detailed Checklist + +### Core Quality +- ✅ Description is specific and includes key terms +- ✅ Description includes both what and when to use +- ❌ SKILL.md body is under 500 lines (currently 1,042) +- ⚠️ Additional details are in separate files (partially - need more) +- ✅ No time-sensitive information +- ✅ Consistent terminology throughout +- ✅ Examples are concrete, not abstract +- ✅ File references are one level deep +- ⚠️ Progressive disclosure used appropriately (needs improvement) +- ✅ Workflows have clear steps + +### Code and Scripts +- ✅ Scripts solve problems rather than punt to Claude +- ✅ Error handling is explicit and helpful +- ✅ No "voodoo constants" (all values justified) +- ✅ Required packages listed in instructions +- ✅ Scripts have clear documentation +- ✅ No Windows-style paths (all forward slashes) +- ✅ Validation/verification steps for critical operations +- ✅ Feedback loops included for quality-critical tasks + +### Structure Alignment +- ✅ YAML frontmatter correct +- ✅ File naming follows conventions +- ⚠️ SKILL.md should be more concise (progressive disclosure) +- ✅ Reference files exist +- ✅ Utils directory organized + +## Recommendations + +### High Priority +1. **Refactor SKILL.md to <500 lines** + - Move detailed technical expertise to `REFERENCE.md` + - Move extensive examples to `EXAMPLES.md` + - Keep only core workflow and essential instructions in SKILL.md + - Use progressive disclosure pattern + +2. **Apply "Concise is Key" principle** + - Remove explanations Claude already knows + - Challenge each paragraph: "Does Claude really need this?" + - Assume Claude knows GitHub Actions, Python, CI/CD basics + +### Medium Priority +3. **Enhance progressive disclosure** + - SKILL.md should be a high-level guide + - Reference files should contain detailed content + - Clear navigation between files + +4. **Optimize description** (optional) + - Current description is good (199 chars) + - Could potentially expand to include more key terms if needed + - But current length is fine + +## Overall Score: 7.5/10 + +**Strengths:** +- ✅ Excellent naming and structure +- ✅ Good description +- ✅ Proper Python implementation +- ✅ Clear file organization +- ✅ No Windows paths or anti-patterns + +**Critical Issue:** +- ❌ SKILL.md is 1,042 lines (should be <500) + +**Conclusion:** The skill follows most best practices well, but needs refactoring to reduce SKILL.md length using progressive disclosure. This is the most important improvement needed to align with best practices. + + diff --git a/.claude/skills/cicd-diagnostics/CHANGELOG.md b/.claude/skills/cicd-diagnostics/CHANGELOG.md new file mode 100644 index 000000000000..4e2135f6a11f --- /dev/null +++ b/.claude/skills/cicd-diagnostics/CHANGELOG.md @@ -0,0 +1,929 @@ +# CI/CD Diagnostics Skill - Changelog + +## Version 2.3.2 - 2025-12-09 (Removed Redundant API Call) + +### Performance Improvement: Skip GitHub API Entirely + +#### Problem Discovered +After implementing the two-tier approach (API + HTML scraping), testing revealed that the GitHub API provides **zero unique value** for workflow syntax error diagnosis: + +**API returned:** +- 4 notices: ARTIFACTORY config (informational only, no diagnostic value) +- 1 failure: Generic "Process completed with exit code 1" (no root cause) +- **Missing:** The actual workflow syntax errors + +**HTML scraper returned:** +- Same 4 notices (with better context - includes job titles) +- Same generic failure +- **PLUS the critical workflow syntax errors at lines 132-136** + +The API was adding latency (~1-2 seconds) with no benefit. + +#### Solution: HTML Scraping Only + +Removed the GitHub API call entirely from `fetch-annotations.py`. Now: +- ✅ Faster execution (single operation instead of two) +- ✅ Clearer output (no confusing "tier 1 vs tier 2") +- ✅ Better context (HTML includes job titles) +- ✅ Same complete data (HTML captures everything API had, plus more) + +#### Changes Made + +**Updated `fetch-annotations.py`:** +- Removed `from github_api import get_workflow_run_annotations` +- Removed all API-related code (lines 47-93) +- Simplified output to show only HTML scraped annotations +- Updated docstring to clarify API doesn't work +- Added clear note explaining why we skip the API + +**Before:** +``` +STEP 1: Fetching job-level annotations from GitHub API +[API call with redundant data] + +STEP 2: Scraping workflow-level annotations from HTML +[HTML scraping with critical errors] + +SUMMARY: Total 11 annotations (5 from API, 6 from HTML) +``` + +**After:** +``` +Fetching workflow annotations from GitHub UI (HTML) +ℹ️ Note: GitHub API does NOT expose workflow syntax validation errors +[HTML scraping with all 6 annotations including critical errors] + +SUMMARY: Total 6 annotations +``` + +#### Impact + +**Performance:** +- ~1-2 seconds faster (eliminated API call + JSON processing) +- Single HTTP request instead of multiple API calls + +**User Experience:** +- Clearer output (no confusing two-source presentation) +- Direct focus on the critical information +- Better context with job titles in annotations + +**Maintenance:** +- Simpler codebase (removed unused API integration code) +- Less fragile (one source to maintain, not two) +- Clearer intent (code explicitly states API doesn't work) + +#### Testing + +**Test case: Run 20043196360** +```bash +$ python3 fetch-annotations.py 20043196360 $WORKSPACE + +================================================================================ +Fetching workflow annotations from GitHub UI (HTML) +================================================================================ +ℹ️ Note: GitHub API does NOT expose workflow syntax validation errors + We scrape the HTML directly to find these critical errors + +📊 Found 6 annotation(s): + • Failure: 2 + • Notice: 4 + +# Critical workflow syntax errors successfully captured: + -6 Release Process + .github/workflows/cicd_6-release.yml (Line: 132, Col: 24): Unexpected value 'true' + ... +``` + +✅ All annotations captured +✅ Faster execution +✅ Clearer output + +#### Files Modified + +**Modified:** +- `fetch-annotations.py` - Removed API code, simplified to HTML-only + +**Note:** `utils/github_api.py` still exists for potential future use if GitHub fixes the API limitation, but is no longer imported or used by fetch-annotations.py. + +#### Backward Compatibility + +✅ Output file format unchanged (`workflow-annotations-scraped.json`) +✅ No breaking changes to data structure +✅ Script signature unchanged (same parameters) +⚠️ `annotations.json` (API output) no longer created - not needed + +--- + +## Version 2.3.1 - 2025-12-09 (HTML Scraping for Workflow Syntax Errors) + +### Critical Enhancement: HTML Scraping for Invisible Errors + +#### Problem Discovered +Version 2.3.0 added workflow annotation detection via GitHub API, but testing revealed a **critical limitation**: workflow-level syntax validation errors are **NOT accessible via any official GitHub API** (REST or GraphQL). These errors are: +- ✅ Visible in the GitHub Actions UI as annotations +- ❌ Not returned by the Check Runs API (`GET /repos/{owner}/{repo}/check-runs/{check_run_id}/annotations`) +- ❌ Not accessible via GraphQL API (which doesn't support Actions workflows at all) +- 🔴 **Critical for diagnosis** because they prevent jobs from executing entirely + +**Reference Issue:** Run 20043196360 had workflow syntax errors at lines 132-136 in `.github/workflows/cicd_6-release.yml` that prevented the release job from executing. These errors were invisible to the API-based annotation fetching. + +#### Solution: HTML Scraping Workaround + +Added HTML scraping capability to extract workflow annotations directly from the GitHub Actions UI when the API fails to provide them. + +**⚠️ IMPORTANT CAVEAT:** This is a workaround for a known GitHub API limitation and may break if GitHub changes their HTML structure. Last tested: 2025-12-09 + +#### Major Changes + +##### 1. HTML Scraper Module (NEW) + +**New file: `utils/html_scraper.py`** +- `scrape_workflow_annotations()` - Fetch and parse HTML from GitHub Actions page +- `parse_annotations_from_html()` - Extract annotations from HTML structure +- `save_scraped_annotations()` - Save with metadata and warnings +- `format_scraped_annotations_report()` - Generate human-readable report + +**HTML Structure Parsed:** +```html + + + -6 Release Process +
+
+ .github/workflows/cicd_6-release.yml (Line: 132, Col: 24): Unexpected value 'true' + .github/workflows/cicd_6-release.yml (Line: 133, Col: 24): Unexpected value 'true' + +
+
+
+``` + +**Extraction Logic:** +1. Find `` blocks using regex +2. Extract title from `` tag +3. Determine severity from SVG icon class (`octicon-x-circle` = failure, `octicon-alert` = warning, `octicon-info` = notice) +4. Extract full message text from inner `
` within `data-target="annotation-message.annotationContainer"` +5. Preserve complete annotation messages without line-by-line parsing (more robust to HTML changes) + +**Key Design Decisions:** +- ✅ Extract full annotation blocks (not individual lines) for robustness +- ✅ Use `curl` directly instead of `gh api` for HTML content +- ✅ Skip empty or very short annotations (< 10 chars) +- ✅ Avoid duplicates by checking message uniqueness +- ✅ Add clear warnings about HTML scraping fragility + +##### 2. Integrated Two-Tier Approach + +**Updated `fetch-annotations.py`:** +- **Tier 1:** Try GitHub API first (fast, official, but incomplete) +- **Tier 2:** Fall back to HTML scraping (slower, fragile, but catches syntax errors) +- Combined reporting shows both sources +- Saves scraped data to `workflow-annotations-scraped.json` + +**Output Structure:** +```json +{ + "workflow_annotations": [ + { + "level": "failure", + "title": "-6 Release Process", + "message": ".github/workflows/cicd_6-release.yml (Line: 132, Col: 24): Unexpected value 'true'\n..." + } + ], + "source": "html_scrape", + "warning": "This data was scraped from HTML and may become invalid if GitHub changes their UI structure", + "url": "https://github.com/dotCMS/core/actions/runs/20043196360", + "run_id": "20043196360", + "scrape_timestamp": "2025-12-09T13:24:48+00:00" +} +``` + +##### 3. Documentation Added + +**New file: `.claude/diagnostics/run-20043196360/ANNOTATION_API_RESEARCH.md`** +- Documents GitHub API limitations discovered +- References GitHub Community Discussion #57536 about `startup_failure` errors +- Explains why HTML scraping is necessary +- Lists all API endpoints tested (all failed to return workflow syntax errors) + +#### Testing & Validation + +**Test Case: Run 20043196360** +- ✅ GitHub API returned 0 annotations +- ✅ HTML scraper found 6 annotations (2 failures, 4 notices) +- ✅ Successfully extracted workflow syntax errors: + ``` + Title: -6 Release Process + Message: .github/workflows/cicd_6-release.yml (Line: 132, Col: 24): Unexpected value 'true' + .github/workflows/cicd_6-release.yml (Line: 133, Col: 24): Unexpected value 'true' + .github/workflows/cicd_6-release.yml (Line: 134, Col: 23): Unexpected value 'true' + .github/workflows/cicd_6-release.yml (Line: 135, Col: 29): Unexpected value 'true' + ``` +- ✅ Full message text preserved (no truncation) +- ✅ Correct severity classification (failure vs notice) + +#### Bug Fixes from Testing + +1. **ImportError for `validate_workspace`** + - **Cause:** Function doesn't exist in `workspace.py` + - **Fix:** Replaced with inline validation in `fetch-annotations.py` + +2. **Truncated annotation messages (initial implementation)** + - **Cause:** Complex regex trying to parse individual lines + - **Fix:** Simplified to extract full annotation blocks from inner div + +3. **HTML fetching with `gh api`** + - **Cause:** `gh api` doesn't handle HTML responses correctly + - **Fix:** Changed to direct `curl` command + +#### API Research Summary + +**APIs Tested (All Failed):** +- ❌ `GET /repos/{owner}/{repo}/actions/runs/{run_id}/jobs` - No annotations field +- ❌ `GET /repos/{owner}/{repo}/check-suites/{suite_id}` - Summary only, no details +- ❌ `GET /repos/{owner}/{repo}/check-runs/{run_id}/annotations` - Returns job-level only +- ❌ GraphQL API - Does not support GitHub Actions workflows +- ❌ GitHub REST API v3 - No workflow annotation endpoints + +**Community Evidence:** +- GitHub Community Discussion #57536 confirms `startup_failure` errors are not exposed via API +- Multiple developers report the same limitation since 2020 +- No official GitHub API support planned (as of December 2025) + +#### When HTML Scraping is Used + +**ALWAYS use HTML scraping when:** +- API returns no annotations but GitHub UI shows them +- Jobs marked "skipped" without obvious conditional logic +- Workflow syntax validation errors suspected +- "Process completed with exit code 1" without other error messages + +**Skip HTML scraping when:** +- API successfully returns annotations with workflow syntax errors +- Job logs contain clear error messages +- Failure is clearly from test/build errors (not workflow syntax) + +#### Maintenance & Fragility Warnings + +**⚠️ HTML scraping is fragile and may break when:** +- GitHub redesigns their Actions UI +- HTML class names or structure changes +- `` custom element is replaced +- SVG icon classes are renamed + +**When scraper breaks:** +1. Update regex patterns in `parse_annotations_from_html()` +2. Test with recent failed run (e.g., 20043196360) +3. Check GitHub's HTML structure for changes +4. Update "Last tested" date in module docstring + +**Monitoring recommendations:** +- Test scraper monthly with known workflow syntax error runs +- Monitor GitHub's Actions changelog for UI updates +- Keep fallback to API-first approach (scraping is secondary) + +#### Files Modified + +**New files:** +- `utils/html_scraper.py` - HTML scraping implementation +- `.claude/diagnostics/run-20043196360/ANNOTATION_API_RESEARCH.md` - API limitation research +- `.claude/diagnostics/run-20043196360/workflow-annotations-scraped.json` - Test output + +**Modified files:** +- `fetch-annotations.py` - Added HTML scraping integration, fixed imports +- `CHANGELOG.md` - This entry + +#### Success Criteria Met + +✅ Identified that GitHub API does NOT expose workflow syntax errors +✅ Researched and documented API limitations with community references +✅ Implemented HTML scraping workaround +✅ Extracted full workflow annotation messages without truncation +✅ Added appropriate warnings about scraping fragility +✅ Tested successfully with run 20043196360 +✅ Documented maintenance procedures for when scraper breaks + +#### Backward Compatibility + +✅ API-first approach preserved +✅ HTML scraping is additive (only used when needed) +✅ No breaking changes to existing utilities +✅ Output format compatible with existing diagnostic reports + +#### Future Considerations + +- Monitor GitHub's API roadmap for official workflow annotation support +- Consider contributing to GitHub Community discussion with workaround details +- Explore GitHub Actions linter integration as alternative to runtime detection +- Add HTML scraper health check to detect when GitHub changes structure + +--- + +## Version 2.3.0 - 2025-12-09 (Workflow Annotations Detection) + +### Problem Solved +The cicd-diagnostics skill was missing critical information when diagnosing workflow failures: **GitHub Actions workflow syntax validation errors** shown as annotations. These errors are visible in the GitHub UI but NOT in job logs, causing the skill to miss the root cause when: +- Release phases were skipped due to workflow syntax errors +- Jobs were marked "skipped" but no conditional logic explained why +- Deployment jobs never ran due to validation failures in the YAML file + +**Reference case:** Issue #34051, Run 20043196360 - Release phase skipped due to syntax error at line 132, but error only visible in annotations, not logs. + +### Solution: Workflow Annotations API Integration + +Added comprehensive workflow annotations detection to identify syntax errors, validation failures, and other workflow-level issues that prevent jobs from running. + +### Major Changes + +#### 1. GitHub API Annotation Fetching (NEW) + +**New function in `utils/github_api.py`:** +- `get_workflow_run_annotations()` - Fetches annotations via GitHub API + - Gets check suite ID from workflow run + - Retrieves all check runs for the suite + - Collects annotations from each check run + - Returns structured annotation data + +**Example annotation structure:** +```json +{ + "path": ".github/workflows/cicd_6-release.yml", + "start_line": 132, + "end_line": 132, + "start_column": 24, + "end_column": 28, + "annotation_level": "failure", + "title": "Invalid workflow file", + "message": "Unexpected value 'true'" +} +``` + +#### 2. Job State Categorization (NEW) + +**New functions in `utils/github_api.py`:** +- `get_skipped_jobs()` - Extract jobs marked as skipped +- `categorize_job_states()` - Distinguish between: + - `failed` - Jobs that ran and failed + - `skipped` - Jobs intentionally skipped (conditional logic) + - `cancelled` - Jobs that were cancelled + - `never_evaluated` - Jobs never run due to syntax errors + - `success`, `in_progress`, `queued` - Other states + +**Impact:** Can now differentiate between "skipped by design" vs "never evaluated due to error". + +#### 3. Annotation Evidence Presentation (NEW) + +**New functions in `utils/evidence.py`:** +- `present_workflow_annotations()` - Format annotations for AI analysis + - Groups by severity level (failure, warning, notice) + - Shows file path, line/column, title, and message + - Provides impact analysis explaining consequences + +- `present_job_state_analysis()` - Analyze job states in context + - Categorizes all jobs by state + - Correlates skipped jobs with syntax errors + - Flags critical finding when both exist + +- **Updated `present_complete_diagnostic()`** - Now includes workflow-level checks + - "STEP 0: WORKFLOW-LEVEL ISSUES (Check First!)" + - Annotations checked BEFORE log analysis + - Job state analysis with syntax error correlation + +#### 4. Fetch Annotations Script (NEW) + +**New file: `fetch-annotations.py`** +- CLI tool to fetch and display workflow annotations +- Same parameter order as other scripts: ` ` +- Caching support via workspace +- Clear output with severity grouping + +**Usage:** +```bash +python3 .claude/skills/cicd-diagnostics/fetch-annotations.py "$RUN_ID" "$WORKSPACE" +``` + +**Output:** +``` +⚠️ Found 1 workflow annotation(s): + + FAILURE: + .github/workflows/cicd_6-release.yml (Line: 132, Col: 24): Invalid workflow file + → Unexpected value 'true' + +💡 Workflow annotations explain why jobs may have been skipped or never evaluated. + These errors are visible in the GitHub UI but not in job logs. +``` + +### Documentation Updates + +#### SKILL.md Enhancements +- **Section 2:** Added annotation fetching to workflow data collection + - Emphasized: "🚨 CRITICAL: Always fetch annotations!" + - Documented when to check annotations (high priority scenarios) + - Updated parameter order documentation + +- **Evidence presentation:** Updated to include workspace parameter for annotation checking + +#### REFERENCE.md Enhancements +- **New section:** "Workflow Annotations Detection (CRITICAL)" + - What are annotations and why they matter + - Pattern indicators for annotation-related failures + - Common error types (syntax, validation, expression errors) + - When to check and how annotations affect diagnosis + +- **Analytical Methodology:** Added "Annotations-First Approach" principle + +### When to Check Annotations (HIGH Priority) + +**ALWAYS check when:** +- ✅ Jobs marked "skipped" without obvious conditional logic (`if`, `needs`) +- ✅ Expected jobs (release, deploy) missing from workflow run +- ✅ Workflow completed but didn't execute all expected jobs +- ✅ No error messages in logs despite workflow failure + +**Why it matters:** +- Jobs marked "skipped" may actually be "never evaluated due to syntax error" +- No job logs exist for jobs prevented by syntax errors +- Root cause is in workflow file, not application code +- Fix requires workflow YAML changes, not code changes + +### Common Annotation Error Types + +1. **Syntax Errors** + - Unexpected value types (`true` instead of string) + - Invalid YAML syntax (indentation, quotes) + - Unrecognized keys or properties + +2. **Validation Failures** + - Invalid job dependencies (`needs` references non-existent job) + - Invalid action references (typos in action names) + - Invalid workflow triggers + +3. **Expression Errors** + - Invalid GitHub expressions (`${{ }}` syntax) + - Undefined context variables or secrets + - Type mismatches in expressions + +### Example: Detecting Syntax Errors + +**Scenario:** Release job marked "skipped" in run 20043196360 + +**Without annotation checking:** +``` +✅ JOB STATE ANALYSIS === +⏭️ SKIPPED: 1 + - Release (ID: 12345) + +ℹ️ Jobs were skipped due to normal conditional logic (if/needs) +``` + +**With annotation checking:** +``` +=== WORKFLOW ANNOTATIONS === +🚨 CRITICAL: Found 1 workflow annotation(s) + +FAILURE (1 annotation(s)) +Annotation 1: + File: .github/workflows/cicd_6-release.yml + Location: Line 132, Col 24 + Title: Invalid workflow file + Message: Unexpected value 'true' + +⚠️ CRITICAL FINDING: SKIPPED JOBS + WORKFLOW SYNTAX ERRORS +Found 1 skipped job(s) AND workflow syntax errors. + +These jobs were likely skipped due to the syntax errors in the workflow file, +NOT due to normal conditional logic. The workflow syntax error prevented +these jobs from being evaluated at all. + +ACTION REQUIRED: +1. Review workflow annotations above for specific syntax errors +2. Fix the syntax error in the workflow YAML file +3. Re-run the workflow after fixing +``` + +### Testing & Validation + +The implementation can be validated with run 20043196360 (referenced in issue #34052) which exhibits: +- Release phase marked as skipped +- Syntax error at line 132 in cicd_6-release.yml +- Error visible in annotations but not in job logs + +### Backward Compatibility + +✅ All existing functionality preserved +✅ No breaking changes to utilities or APIs +✅ Annotation checking is additive - doesn't affect existing diagnostics +✅ Workspace caching extended to include annotations.json + +### Files Modified + +**New files:** +- `.claude/skills/cicd-diagnostics/fetch-annotations.py` - Annotation fetching script + +**Modified files:** +- `.claude/skills/cicd-diagnostics/utils/github_api.py` - Added annotation fetching functions +- `.claude/skills/cicd-diagnostics/utils/evidence.py` - Added annotation presentation functions +- `.claude/skills/cicd-diagnostics/SKILL.md` - Updated workflow documentation +- `.claude/skills/cicd-diagnostics/REFERENCE.md` - Added annotation detection patterns + +### Success Criteria Met + +✅ Fetch workflow run annotations via GitHub API +✅ Display syntax validation errors prominently in diagnostic reports +✅ Distinguish between jobs that failed vs were skipped vs never evaluated +✅ Add annotation checking to evidence presentation workflow +✅ Update REFERENCE.md with annotation detection patterns + +### Future Enhancements + +Potential additions for future versions: +- Annotation caching with expiration +- Historical annotation tracking to detect pattern changes +- Proactive workflow YAML validation before push +- Integration with GitHub Actions linter + +--- + +## Version 2.2.2 - 2025-11-10 (Parameter Validation Improvement) + +### Problem +The `fetch-logs.py` script's parameter validation was too simplistic, causing false positives when the workspace path ended with a run ID (e.g., `.claude/diagnostics/run-19219835536`). The validation checked if the workspace parameter was all digits, but didn't account for long run IDs appearing in valid paths. + +### Solution +Improved the validation logic to distinguish between: +- **Valid workspace paths** that may contain digits (e.g., `/path/to/run-19219835536`) +- **Job IDs** that are purely numeric and typically 11+ digits long + +### Changes Made +- Updated `fetch-logs.py` line 39: Changed validation from `workspace_path.isdigit()` to `workspace_path.isdigit() and len(workspace_path) > 10` +- This allows paths containing run IDs to pass validation while still catching parameter order mistakes + +### Before +```python +if workspace_path.isdigit(): + # Would incorrectly trigger on paths like "run-19219835536" +``` + +### After +```python +if workspace_path.isdigit() and len(workspace_path) > 10: + # Only triggers on pure job IDs (11+ digits), not paths with numbers +``` + +### Impact +- **Fixed false positives** - Valid workspace paths with run IDs no longer trigger validation errors +- **Maintained error detection** - Still catches actual parameter order mistakes (e.g., swapping workspace and job ID) +- **Better user experience** - Clear error messages when parameters are truly in wrong order +- **No breaking changes** - All correct usage continues to work + +### Testing +Validated with: +- ✅ Correct order: `fetch-logs.py 19219835536 /path/to/run-19219835536 54939324205` (works) +- ✅ Wrong order detection: `fetch-logs.py /path/to/workspace 54939324205` (correctly caught) +- ✅ Path with run ID: `.claude/diagnostics/run-19219835536` (no longer false positive) + +--- + +## Version 2.2.1 - 2025-11-10 (Parameter Consistency Documentation Fix) + +### Problem +The SKILL.md documentation showed a complex Python code block for calling `fetch-logs.py`, which made it easy to confuse parameter order. The error occurred because: +- Documentation showed nested Python subprocess calls instead of direct Bash +- Parameter order wasn't emphasized clearly +- Inconsistent presentation across different scripts + +### Solution +1. **Simplified documentation** - Replaced complex Python examples with straightforward Bash commands +2. **Added parameter order emphasis** - Clearly stated "All scripts follow the same pattern: [optional]" +3. **Added error prevention tips** - Documented common error and how to fix it +4. **Consistent examples** - All three scripts now show consistent usage + +### Changes Made +- Updated SKILL.md section "3. Download Failed Job Logs" to use simple Bash syntax +- Updated SKILL.md section "2. Fetch Workflow Data" to emphasize consistent parameter order +- Added parameter order documentation and tips + +### Before +```python +# Complex Python code calling subprocess +subprocess.run([ + "python3", ".claude/skills/cicd-diagnostics/fetch-logs.py", + "19131365567", # RUN_ID + str(WORKSPACE), # WORKSPACE path + str(failed_job_id) # JOB_ID (optional) +]) +``` + +### After +```bash +# Simple, clear Bash command +python3 .claude/skills/cicd-diagnostics/fetch-logs.py \ + "$RUN_ID" \ + "$WORKSPACE" \ + 54939324205 # JOB_ID from fetch-jobs.py output +``` + +### Impact +- **No code changes required** - The actual Python scripts were already correct +- **Documentation clarity improved** - Easier to understand and use correctly +- **Error prevention** - Clear parameter order reduces mistakes +- **Consistency** - All three scripts now documented the same way + +--- + +## Version 2.2.0 - 2025-11-10 (Flexibility & AI-Driven Investigation) + +### Philosophy Change: From Checklist to Investigation + +**Problem:** Previous version (2.1.0) had numbered steps (0-10) that felt prescriptive and rigid. Risk of the AI following steps mechanically rather than adapting to findings. + +**Solution:** Redesigned as an adaptive, evidence-driven investigation framework. + +### Major Changes + +#### 1. Investigation Decision Tree (NEW) + +Added visual decision tree to guide investigation approach based on failure type: + +``` +Test Failure → Check code changes + Known issues +Deployment Failure → CHECK EXTERNAL ISSUES FIRST +Infrastructure Failure → Check logs + Patterns +``` + +**Decision points at key stages:** +- After evidence: External issue or internal? +- After known issues: Duplicate or new? +- After analysis: Confidence HIGH/MEDIUM/LOW? + +#### 2. Removed Rigid Step Numbers + +**Before:** +``` +### 0. Setup and Load Utilities +### 1. Identify Target +### 2. Fetch Workflow Data +... +### 10. Create Issue +``` + +**After:** +``` +## Investigation Toolkit + +Use these techniques flexibly: + +### Setup and Load Utilities (Always Start Here) +### Identify Target and Create Workspace +### Fetch Workflow Data +... +### Create Issue (if needed) +``` + +**Impact:** AI can now skip irrelevant steps, reorder techniques, and adapt depth based on findings. + +#### 3. Conditional Guidance Added + +Every major technique now has "When to use" guidance: + +**Example - Check Known Issues:** +``` +Check External Issues when evidence suggests: +- 🔴 HIGH Priority - Authentication errors + service names +- 🟡 MEDIUM Priority - Infrastructure errors + timing +- ⚪ LOW Priority - Test failures with clear assertions + +Skip external checks if: +- Test assertion failure with obvious code bug +- Known flaky test already documented +``` + +#### 4. Enhanced Key Principles + +**New Principle: Tool Selection Based on Failure Type** + +| Failure Type | Primary Tools | Skip | +|--------------|---------------|------| +| Deployment/Auth | external_issues.py, WebSearch | Deep log analysis | +| Test assertion | Code changes, test history | External checks | +| Flaky test | Run history, timing patterns | External checks | + +**Updated Principle: Adaptive Investigation Depth** + +``` +Quick Win (30 sec - 2 min) → Known issue? Clear error? +Standard Investigation (2-10 min) → Gather, hypothesize, test +Deep Dive (10+ min) → Unclear patterns, multiple theories +``` + +**Don't always do everything - Stop when confident.** + +#### 5. Natural Reporting Guidelines + +**Before:** Fixed template with 8 required sections + +**After:** Write naturally with relevant sections: +- Core sections (always): Summary, Root Cause, Evidence, Recommendations +- Optional sections: Known Issues, Timeline, Test Fingerprint (when relevant) + +**Guideline:** "A deployment authentication error doesn't need a 'Test Fingerprint' section." + +### Success Criteria Updated + +**Changed focus from checklist completion to investigation quality:** + +**Investigation Quality:** +- ✅ Used adaptive investigation depth (stopped when confident) +- ✅ Let evidence guide technique selection (didn't use every tool blindly) +- ✅ Made appropriate use of external validation (when patterns suggest it) + +**Removed rigid requirements:** +- ❌ "Checked known issues" → ✅ "Assessed whether this is a known issue (when relevant)" +- ❌ "Validated external dependencies" → ✅ "Made appropriate use of external validation" + +### Examples of Improved Flexibility + +**Scenario 1: Clear Test Assertion Failure** +- **Old behavior:** Still checks external issues, runs full diagnostic +- **New behavior:** Quickly identifies code change, checks internal issues, done + +**Scenario 2: NPM Authentication Error** +- **Old behavior:** Goes through all 10 steps sequentially +- **New behavior:** Decision tree → Deployment failure → Check external FIRST → Find npm security update → Done + +**Scenario 3: Unclear Pattern** +- **Old behavior:** Might stop at step 7 without deep analysis +- **New behavior:** Recognizes low confidence → Gathers more context → Compares runs → Forms conclusion + +### Backward Compatibility + +✅ All utilities unchanged - still work the same way +✅ Evidence extraction unchanged - same quality +✅ External issue detection - still available when needed +✅ No breaking changes to existing functionality + +### Documentation Impact + +- **SKILL.md:** Complete restructure (~200 lines changed) +- **Philosophy section:** New 6-point investigation pattern +- **Decision tree:** New visual guide +- **Key Principles:** Rewritten with flexibility focus +- **Success Criteria:** Shifted from compliance to quality + +--- + +## Version 2.1.0 - 2025-11-10 + +### Major Enhancements + +#### 1. External Issue Detection (NEW) + +**Problem Solved:** Skill was missing critical external service changes (like npm security updates) that cause CI/CD failures. + +**Solution:** Added comprehensive external issue detection system. + +**New Capabilities:** +- **Automated pattern detection** for npm, Docker, GitHub Actions errors +- **Likelihood assessment** (LOW/MEDIUM/HIGH) for external causes +- **Targeted web search generation** based on error patterns +- **Service-specific checks** with direct links to status pages +- **Timeline correlation** to detect service change impacts + +**New Files:** +- `utils/external_issues.py` - External issue detection utilities + - `extract_error_indicators()` - Parse logs for external error patterns + - `generate_search_queries()` - Create targeted web searches + - `suggest_external_checks()` - Recommend which services to verify + - `format_external_issue_report()` - Generate markdown report section + +**Updated Files:** +- `SKILL.md` - Added Step 5: "Check Known Issues (Internal and External)" + - Automated detection using new utility + - Internal GitHub issue searches + - External web searches for high-likelihood issues + - Correlation analysis with red flags + +**Success Criteria Updated:** +- ✅ **Checked known issues - internal (GitHub) AND external (service changes)** +- ✅ **Validated external dependencies (npm, Docker, GitHub Actions) if relevant** +- ✅ Generated comprehensive natural report **with external context** + +#### 2. Improved Error Detection in Logs + +**Problem Solved:** NPM OTP errors and other critical deployment failures were buried under transient Docker errors. + +**Solution:** Enhanced evidence extraction to prioritize and properly detect critical errors. + +**Changes to `utils/evidence.py`:** +- **Enhanced error keyword detection:** + - Added `npm ERR!`, `::error::`, `##[error]` + - Added `FAILURE:`, `Failed to`, `Cannot`, `Unable to` + +- **Smart filtering:** + - Skip false positives (`.class` files, `.jar` references) + - Distinguish between recoverable vs. fatal errors + +- **Prioritization:** + - Scan entire log (not just first 100 lines) + - Show **last 10 error groups** (final/fatal errors) + - Provide more context (10 lines vs 6 lines after error) + +- **Two-pass strategy:** + - First pass: Critical deployment/infrastructure errors + - Second pass: Test errors (if no critical errors found) + +**Before:** +``` +ERROR MESSAGES === +[Shows first 100 lines of Docker blob errors, stops] +[NPM OTP error at line 38652 never shown] +``` + +**After:** +``` +ERROR MESSAGES === +[Shows last 10 critical error groups from entire log] +[NPM OTP error properly captured and displayed] +``` + +### Bug Fixes + +1. **Path handling in Python scripts** - Scripts now work correctly when called from any directory +2. **Step numbering** - Fixed duplicate step 6, renumbered workflow steps (5-10) +3. **Evidence limit** - Increased from 100 to 150 lines to capture more context +4. **Smart file listing filter** - Fixed overly aggressive `.class` file filtering: + - **Before:** Skipped ANY line containing `.class` (would miss real errors like `ERROR: Failed to load class MyClass`) + - **After:** Only skip lines that are pure file listings (tar/zip output) without error keywords + - **Logic:** Skip line ONLY if it contains `.class` AND path pattern (`maven/dotserver`) AND NO error keywords (`ERROR:`, `FAILURE:`, `Failed`, `Exception:`) + - **Result:** Now captures real Java class loading errors while filtering file listings + +### Documentation Updates + +**README.md:** +- Added external issue detection to capabilities +- Updated examples to show external validation + +**SKILL.md:** +- Restructured diagnostic workflow (0-10 steps) +- Added detailed Step 5 with external issue checking +- Updated success criteria +- Added external_issues.py utility reference + +### Examples Added + +**NPM Security Update (November 2025):** +- Demonstrates detecting npm classic token revocation +- Shows correlation with failure timeline +- Provides migration path recommendations + +**Detection Pattern:** +``` +🔴 External Cause Likelihood: HIGH + +Indicators: +- NPM authentication errors (EOTP/ENEEDAUTH) often caused by + npm registry policy changes +- Multiple consecutive failures suggest external change + +Recommended Web Searches: +- npm EOTP authentication error November 2025 +- npm classic token revoked 2025 +``` + +### Migration Notes + +**For existing diagnostics:** +1. Re-run skill on historical failures to check for external causes +2. Update any diagnosis reports to include external validation +3. Use new utility for future diagnostics + +**No breaking changes** - All existing functionality preserved. + +### Testing + +Validated with: +- Run 19219835536 (nightly build failure Nov 10, 2025) +- Successfully identified npm EOTP error +- Detected npm security update as external cause +- Generated accurate timeline correlation +- Provided actionable migration recommendations + +### Future Enhancements + +Potential additions for future versions: +- Expand external_issues.py to detect more service patterns +- Add caching for web search results +- Create database of known external service changes +- Add Slack/email notifications for external issues +- Integration with service status APIs + +--- + +## Version 2.0.0 - 2025-11-07 + +Initial Python-based implementation with evidence-driven analysis. + +## Version 1.0.0 - 2025-10-15 + +Initial bash-based implementation. diff --git a/.claude/skills/cicd-diagnostics/ENHANCEMENTS.md b/.claude/skills/cicd-diagnostics/ENHANCEMENTS.md new file mode 100644 index 000000000000..53864aef4c48 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/ENHANCEMENTS.md @@ -0,0 +1,351 @@ +# CI/CD Diagnostics Skill Enhancements + +**Date:** 2025-11-06 +**Status:** ✅ Tiered Extraction and Retry Analysis Complete + +--- + +## Problem Statement + +The original error extraction approach had a critical limitation: + +``` +Error: File content (33,985 tokens) exceeds maximum allowed tokens (25,000) +``` + +Even after extracting "error sections only" from an 11.5MB log file, the resulting file was still **too large to process in a single Read operation**. This made it impossible for the AI to analyze the evidence without manual chunking. + +--- + +## Solution: Tiered Evidence Extraction + +### Core Innovation + +Instead of a single extraction level, we now create **three progressively detailed levels** that allow the AI to: + +1. **Start with a quick overview** (Level 1 - always fits in context) +2. **Get detailed errors** (Level 2 - moderate detail) +3. **Deep dive if needed** (Level 3 - comprehensive context) + +### Implementation + +**New File:** `.claude/skills/cicd-diagnostics/utils/tiered-extraction.sh` + +#### Level 1: Test Summary (~1,500 tokens) +```bash +extract_level1_summary LOG_FILE OUTPUT_FILE +``` + +**Contents:** +- Overall test results (pass/fail counts) +- List of failed test names (no details) +- Retry patterns summary +- Classification hints (timeout count, assertion count, NPE count, infra errors) + +**Size:** ~6,222 bytes (~1,555 tokens) - **Always readable** + +**Use Case:** Quick triage - "What failed and why might it have failed?" + +#### Level 2: Unique Failures (~6,000 tokens) +```bash +extract_level2_unique_failures LOG_FILE OUTPUT_FILE +``` + +**Contents:** +- Deterministic failures with retry counts (4/4 failed = blocking bug) +- Flaky tests with pass/fail breakdown (2/4 failed = timing issue) +- First occurrence of each unique error type: + - ConditionTimeoutException (Awaitility failures) + - AssertionError / ComparisonFailure + - NullPointerException + - Other exceptions + +**Size:** ~24,624 bytes (~6,156 tokens) - **Fits in context** + +**Use Case:** Detailed analysis - "What's the actual error message and pattern?" + +#### Level 3: Full Context (~21,000 tokens) +```bash +extract_level3_full_context LOG_FILE OUTPUT_FILE +``` + +**Contents:** +- Complete retry analysis with all attempts +- All error sections with full stack traces +- Timing correlation (errors with timestamps) +- Infrastructure events (Docker, DB, ES failures) +- Test execution timeline for failed tests + +**Size:** ~86,624 bytes (~21,656 tokens) - **Just fits in context** + +**Use Case:** Deep investigation - "Show me everything about this failure" + +### Auto-Tiered Extraction + +```bash +auto_extract_tiered LOG_FILE WORKSPACE +``` + +**Smart behavior:** +- Always creates Level 1 (summary) +- Always creates Level 2 (unique failures) +- Only creates Level 3 if log > 5MB (for complex cases) + +**Output:** +``` +=== Auto-Tiered Extraction === +Log size: 11 MB + +Creating Level 1 (Summary)... +✓ Level 1 created: 6222 bytes (~1555 tokens) + +Creating Level 2 (Unique Failures)... +✓ Level 2 created: 24624 bytes (~6156 tokens) + +Creating Level 3 (Full Context) - large log detected... +✓ Level 3 created: 86624 bytes (~21656 tokens) + +=== Tiered Extraction Complete === +Analysis workflow: +1. Read Level 1 for quick overview and classification hints +2. Read Level 2 for detailed error messages and retry patterns +3. Read Level 3 (if exists) for deep dive analysis +``` + +--- + +## Enhancement 2: Automated Retry Pattern Analysis + +### Problem + +The original diagnosis required manual analysis to distinguish: +- **Deterministic failures** (test fails 100% of the time = real bug) +- **Flaky tests** (test fails sometimes = timing/concurrency issue) + +This distinction is **critical** for proper diagnosis and prioritization. + +### Solution + +**New File:** `.claude/skills/cicd-diagnostics/utils/retry-analyzer.sh` + +```bash +analyze_simple_retry_patterns LOG_FILE +``` + +**Output:** +``` +================================================================================ +RETRY PATTERN ANALYSIS +================================================================================ + +Surefire retry mechanism detected + +=== DETERMINISTIC FAILURES (All Retries Failed) === + • com.dotcms.publisher.business.PublisherTest.autoUnpublishContent - Failed 4/4 retries (100% failure rate) + +=== FLAKY TESTS (Passed Some Retries) === + • com.dotcms.publisher.business.PublisherTest.testPushArchivedAndMultiLanguageContent - Failed 2/4 retries (50% failure rate, 2 passed) + • com.dotcms.publisher.business.PublisherTest.testPushContentWithUniqueField - Failed 2/4 retries (50% failure rate, 2 passed) + • com.dotmarketing.startup.runonce.Task240306MigrateLegacyLanguageVariablesTest.testBothFilesMapToSameLanguageWithPriorityHandling - Failed 1/2 retries (50% failure rate, 1 passed) + +=== SUMMARY === +Deterministic failures: 1 test(s) +Flaky tests: 3 test(s) +Total problematic tests: 4 + +⚠️ BLOCKING: 1 deterministic failure(s) detected + These tests failed ALL retry attempts - indicates real bugs or incomplete fixes +⚠️ WARNING: 3 flaky test(s) detected + These tests passed some retries - indicates timing/concurrency issues + +================================================================================ +``` + +### Key Benefits + +1. **Immediate Classification:** Instantly see which failures are blocking vs flaky +2. **Retry Context:** Understand failure rates (4/4 vs 2/4 tells completely different stories) +3. **Actionable Guidance:** Clear labeling of BLOCKING vs WARNING severity +4. **No Manual Counting:** Automatically parses Surefire retry summary format + +--- + +## Impact Assessment + +### Before Enhancements + +**Problem:** Error extraction created 80KB file (33,985 tokens) +``` +Read(.claude/diagnostics/run-19147272508/error-sections.txt) + ⎿ Error: File content (33,985 tokens) exceeds maximum allowed tokens (25,000) +``` + +**Workaround Required:** +- Manual grep commands to extract specific sections +- Multiple Read operations with offset/limit parameters +- Slow, iterative analysis +- Easy to miss critical information + +### After Enhancements + +**Solution:** Tiered extraction with guaranteed-readable sizes + +**Level 1:** 1,555 tokens - Quick overview +```bash +cat .claude/diagnostics/run-19147272508/evidence-level1-summary.txt +# Always readable, instant triage +``` + +**Level 2:** 6,156 tokens - Detailed errors +```bash +cat .claude/diagnostics/run-19147272508/evidence-level2-unique.txt +# First occurrence of each error type with context +``` + +**Level 3:** 21,656 tokens - Full context +```bash +cat .claude/diagnostics/run-19147272508/evidence-level3-full.txt +# Complete investigation details +``` + +**Retry Analysis:** Automated classification +```bash +source .claude/skills/cicd-diagnostics/utils/retry-analyzer.sh +analyze_simple_retry_patterns "$LOG_FILE" +# Instant deterministic vs flaky distinction +``` + +--- + +## Usage Examples + +### Example 1: Quick Triage (30 seconds) + +```bash +# Initialize and extract +RUN_ID=19147272508 +bash .claude/skills/cicd-diagnostics/init-diagnostic.sh "$RUN_ID" +source .claude/skills/cicd-diagnostics/utils/tiered-extraction.sh + +WORKSPACE="/path/to/.claude/diagnostics/run-$RUN_ID" +LOG_FILE="$WORKSPACE/failed-job-*.txt" + +# Create tiered extractions +auto_extract_tiered "$LOG_FILE" "$WORKSPACE" + +# Read Level 1 (always fits) +cat "$WORKSPACE/evidence-level1-summary.txt" + +# Result: Instant answer to "what failed?" +``` + +### Example 2: Detailed Analysis (2 minutes) + +```bash +# After Level 1 triage, read Level 2 for error details +cat "$WORKSPACE/evidence-level2-unique.txt" + +# Get retry pattern analysis +source .claude/skills/cicd-diagnostics/utils/retry-analyzer.sh +analyze_simple_retry_patterns "$LOG_FILE" + +# Result: Know exact error messages and whether failures are deterministic or flaky +``` + +### Example 3: Deep Investigation (5 minutes) + +```bash +# For complex cases, read Level 3 +cat "$WORKSPACE/evidence-level3-full.txt" + +# Result: Complete stack traces, timing correlation, infrastructure events +``` + +--- + +## Performance Comparison + +| Metric | Before | After | Improvement | +|--------|--------|-------|-------------| +| **Extraction Time** | ~5 seconds | ~5 seconds | Same | +| **File Size (error sections)** | 80KB (33,985 tokens) | Level 1: 6KB (1,555 tokens) | **95% reduction** | +| **Readability** | ❌ Too large | ✅ Always readable | **Fixed** | +| **Analysis Speed** | 5+ min (manual chunks) | 30sec - 2min (progressive) | **60-80% faster** | +| **Retry Classification** | Manual counting | Automated | **100% automation** | +| **Accuracy** | Prone to counting errors | Algorithmic parsing | **More reliable** | + +--- + +## Test Results (Run 19147272508) + +### Tiered Extraction +``` +✓ Level 1 created: 6,222 bytes (~1,555 tokens) - READABLE +✓ Level 2 created: 24,624 bytes (~6,156 tokens) - READABLE +✓ Level 3 created: 86,624 bytes (~21,656 tokens) - READABLE +``` + +### Retry Pattern Analysis +``` +✓ Correctly identified 1 deterministic failure (4/4 retries failed) +✓ Correctly identified 3 flaky tests with pass/fail breakdowns +✓ Accurate failure rate calculations (50%, 50%, 50%) +✓ Clear blocking vs warning classification +``` + +### AI Analysis Workflow +``` +1. Read Level 1 → Identified PublisherTest failures and timing issues (10 sec) +2. Read Level 2 → Saw ConditionTimeout pattern for IdentifierDateJob (30 sec) +3. Run retry analysis → Confirmed 1 deterministic, 3 flaky (5 sec) +4. Read Level 3 → Got full stack traces for deep dive (60 sec) + +Total: ~2 minutes from log download to full diagnosis +``` + +--- + +## Next Steps (Future Enhancements) + +### High Priority (Recommended by ANALYSIS_EVALUATION.md) + +1. **PR Diff Integration** + - Automatically fetch PR diff when analyzing PR failures + - Show code changes that may have caused failure + - Implementation: `fetch_pr_diff()` utility function + +2. **Background Job Execution Tracing** + - Extract logs specifically for background jobs (Quartz, IdentifierDateJob, etc.) + - Help diagnose request context issues + - Implementation: `trace_job_execution()` utility function + +3. **Automated Known Issue Search** + - Search GitHub issues for matching test names/patterns + - Instant detection of known flaky tests + - Implementation: `find_related_issues()` utility function + +### Medium Priority + +4. **Timing Correlation Analysis** + - Correlate error timestamps to detect cascades + - Identify primary vs secondary failures + - Implementation: `correlate_error_timing()` utility function + +5. **Infrastructure Event Detection** + - Parse Docker/DB/ES logs for root cause + - Detect environment issues vs code issues + - Implementation: `extract_infrastructure_events()` utility function + +--- + +## Conclusion + +The tiered extraction system successfully solves the "file too large" problem while providing a **better analysis workflow**: + +- ✅ **Level 1 always readable** - No more token limit errors +- ✅ **Progressive detail** - Start fast, go deep only when needed +- ✅ **Automated retry analysis** - Instant deterministic vs flaky classification +- ✅ **60-80% faster** - Less manual work, clearer insights +- ✅ **More reliable** - Algorithmic parsing vs manual counting + +**Impact:** The skill can now handle large CI/CD logs efficiently and provide instant triage, making it suitable for production use in automated diagnostics workflows. diff --git a/.claude/skills/cicd-diagnostics/ISSUE_TEMPLATE.md b/.claude/skills/cicd-diagnostics/ISSUE_TEMPLATE.md new file mode 100644 index 000000000000..9a16cc838a82 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/ISSUE_TEMPLATE.md @@ -0,0 +1,510 @@ +# GitHub Issue Templates for CI/CD Failures + +Standard templates for documenting build failures. + +## Template Selection Guide + +**New Build Failure** → Use "Build Failure Report" template +**Flaky Test** → Use "Flaky Test Report" template +**Infrastructure Issue** → Use "Infrastructure Issue" template +**Add to existing issue** → Use "Failure Update Comment" template + +## Build Failure Report Template + +Use when creating a new issue for a consistent build failure. + +```markdown +## Build Failure Report + +**Workflow Run**: [workflow-name #run-id](run-url) +**Failed Job**: `job-name` +**Commit**: [`short-sha`](commit-url) - commit message +**Branch**: `branch-name` +**PR**: #pr-number (if applicable) +**Date**: YYYY-MM-DD HH:MM UTC + +### Failure Summary + +Brief description of what failed (1-2 sentences). + +### Failed Test(s) + +If test failure, list test class and method: +``` +com.dotcms.contenttype.business.ContentTypeAPIImplTest.testCreateContentType +``` + +If build failure, describe the build phase: +``` +Maven compilation phase - Java syntax error in ContentTypeResource.java +``` + +### Error Message + +``` +[Insert relevant error message] +Example: +java.lang.AssertionError: Expected content type to be created + Expected: ContentType{name='test', baseType=CONTENT} + Actual: null +``` + +### Stack Trace + +``` +[Insert relevant stack trace, focus on com.dotcms.* lines] +Example: +java.lang.NullPointerException: Cannot invoke method on null object + at com.dotcms.contenttype.business.ContentTypeAPIImpl.save(ContentTypeAPIImpl.java:456) + at com.dotcms.contenttype.business.ContentTypeAPIImplTest.testCreateContentType(ContentTypeAPIImplTest.java:123) +``` + +### Root Cause + +**Category**: [Code Change | Test Issue | Infrastructure | External Dependency] + +**Analysis**: +Explain the identified root cause with evidence (changed files, recent commits, historical pattern). + +Example: +"The failure was introduced in commit abc1234 which refactored the ContentType save logic. The test expects the save method to return the created object, but the refactored code returns null when validation fails." + +### Classification + +- **Type**: [New Failure | Regression | Test Gap] +- **Introduced in**: commit-sha or "unknown" +- **First failed**: run-id and date +- **Reproducibility**: [Always | Sometimes | Once] +- **Affects workflows**: [PR | Merge Queue | Trunk | Nightly] + +### Related Changes + +Commits between last success and this failure: +- `abc1234` - Refactor ContentType API by @author (YYYY-MM-DD) +- `def5678` - Update test fixtures by @author (YYYY-MM-DD) + +### Reproduction Steps + +Steps to reproduce locally (if known): +```bash +./mvnw test -Dtest=ContentTypeAPIImplTest#testCreateContentType +``` + +Or mark as: +``` +Cannot reproduce locally - CI environment specific +``` + +### Recommendations + +1. **Immediate action**: [Specific fix or workaround] + ```bash + [Command or code snippet if applicable] + ``` + +2. **Verification**: [How to verify the fix] + ```bash + [Test command] + ``` + +3. **Prevention**: [How to prevent similar issues] + [Description] + +### Related Issues + +- Related to #issue-number +- Similar to #issue-number +- Depends on #issue-number + +### Additional Context + +[Any other relevant information: environment details, configuration, external factors] + +--- +*Generated by CI/CD Diagnostics Skill* +``` + +**Labels to add**: +- `bug` (always) +- `ci-cd` (always) +- Workflow-specific: `pr-workflow`, `merge-queue`, `trunk-workflow`, or `nightly` +- Type-specific: `test-failure`, `build-failure`, `deployment-failure` + +**gh CLI command**: +```bash +gh issue create \ + --title "[CI/CD] Brief description of failure" \ + --body "$(cat issue-body.md)" \ + --label "bug,ci-cd,pr-workflow" +``` + +## Flaky Test Report Template + +Use when documenting a test that fails intermittently. + +```markdown +## Flaky Test Report + +**Test**: `com.dotcms.package.TestClass.testMethod` +**Failure Rate**: X failures out of Y runs (Z%) +**Date Range**: YYYY-MM-DD to YYYY-MM-DD +**Workflows Affected**: [PR | Merge Queue | Nightly] + +### Failure Pattern + +**Frequency**: +- Last 30 days: X failures / Y runs (Z%) +- Last 7 days: X failures / Y runs (Z%) + +**Time pattern** (if any): +- Random failures: No time pattern detected +- OR: Tends to fail during high load / specific time of day + +**Workflow pattern**: +- Fails in: [which workflows] +- Always passes in: [which workflows] +- Pattern: [describe any pattern] + +### Example Failures + +**Recent failure 1**: +- Run: [run-name #run-id](run-url) +- Date: YYYY-MM-DD +- Error: `brief error message` + +**Recent failure 2**: +- Run: [run-name #run-id](run-url) +- Date: YYYY-MM-DD +- Error: `brief error message` + +**Recent failure 3**: +- Run: [run-name #run-id](run-url) +- Date: YYYY-MM-DD +- Error: `brief error message` + +### Error Messages + +Common error patterns seen: +``` +[Error message variant 1] +``` + +``` +[Error message variant 2] +``` + +### Suspected Root Cause + +**Hypothesis**: [Your hypothesis about why it's flaky] + +Examples: +- Race condition in async operation +- Timing dependency on external service +- Resource contention (database connections, ports) +- Non-deterministic test data +- Cleanup issue leaving state for next test + +**Evidence**: +- [Supporting evidence for hypothesis] +- [Stack trace analysis] +- [Timing information] + +### Test Code Location + +- File: `src/test/java/com/dotcms/package/TestClass.java` +- Method: `testMethod` (line XXX) +- Related code: [Files tested by this test] + +### Mitigation Options + +**Option 1: Fix the root cause** (preferred) +- [ ] Identify race condition +- [ ] Add proper synchronization/waiting +- [ ] Improve test isolation +- [ ] Fix cleanup issues + +**Option 2: Improve test resilience** (temporary) +- [ ] Add retry logic +- [ ] Increase timeouts +- [ ] Add explicit waits +- [ ] Improve assertions + +**Option 3: Quarantine** (last resort) +- [ ] Mark with `@Flaky` annotation +- [ ] Exclude from CI runs temporarily +- [ ] Track in separate test suite +- [ ] Create investigation task + +### Recommended Actions + +1. [Specific action 1] +2. [Specific action 2] +3. [Specific action 3] + +### Related Issues + +- Similar flaky test: #issue-number +- Related to: #issue-number + +--- +*Generated by CI/CD Diagnostics Skill* +``` + +**Labels to add**: +- `flaky-test` (always) +- `test-failure` +- `ci-cd` +- Severity: `high-priority` if >20% failure rate, `medium-priority` if 5-20%, `low-priority` if <5% + +**gh CLI command**: +```bash +gh issue create \ + --title "[Flaky Test] TestClass.testMethod - X% failure rate" \ + --body "$(cat flaky-test.md)" \ + --label "flaky-test,test-failure,ci-cd,high-priority" +``` + +## Infrastructure Issue Template + +Use for issues related to CI/CD infrastructure, not code. + +```markdown +## CI/CD Infrastructure Issue + +**Affected Workflows**: [PR | Merge Queue | Trunk | Nightly | All] +**Issue Type**: [Timeout | Connectivity | Resource | Service Outage] +**First Observed**: YYYY-MM-DD HH:MM UTC +**Status**: [Ongoing | Resolved | Intermittent] + +### Symptom + +Brief description of the infrastructure issue. + +Example: +"Multiple workflow runs timing out during Elasticsearch startup phase" + +### Affected Runs + +Recent runs experiencing this issue: +- [workflow #run-id](run-url) - YYYY-MM-DD - timeout after 15 minutes +- [workflow #run-id](run-url) - YYYY-MM-DD - connection refused +- [workflow #run-id](run-url) - YYYY-MM-DD - rate limit exceeded + +### Error Patterns + +``` +[Common error message 1] +``` + +``` +[Common error message 2] +``` + +### Investigation + +**External Service Status**: +- GitHub Actions status: [Link to status page] +- Maven Central: [Status] +- Docker Hub: [Status] +- Other services: [Status] + +**Runner Information**: +- Runner OS: [ubuntu-latest, macos-latest, etc.] +- Runner version: [if known] +- Resource limits: [if relevant] + +**Timing**: +- Time of day pattern: [if any] +- Duration of issue: [how long observed] +- Frequency: [always, intermittent, rare] + +### Root Cause + +**Identified cause** (if known): +[Description of root cause] + +**Suspected cause** (if investigating): +[Hypothesis about cause] + +### Impact + +- **Workflows blocked**: X runs failed +- **PRs affected**: Y PRs unable to merge +- **Duration**: Started YYYY-MM-DD, ongoing/resolved YYYY-MM-DD +- **Severity**: [Critical | High | Medium | Low] + +### Workaround + +**Temporary workaround** (if available): +```bash +[Commands or config changes] +``` + +Or: +``` +No workaround available - must wait for service restoration +``` + +### Resolution + +**Status**: [Investigating | Waiting for external fix | Fixed] + +**Actions taken**: +1. [Action 1] +2. [Action 2] +3. [Action 3] + +**Permanent fix** (if applicable): +[Description of fix implemented] + +### Related Issues + +- Related to #issue-number +- Duplicate of #issue-number +- External issue: [link to GitHub Actions, service status, etc.] + +--- +*Generated by CI/CD Diagnostics Skill* +``` + +**Labels to add**: +- `ci-cd` +- `infrastructure` +- Severity based on impact: `critical`, `high-priority`, `medium-priority` +- Type: `timeout`, `connectivity`, `resource-constraint` + +## Failure Update Comment Template + +Use when adding information to an existing issue. + +```markdown +### Additional Failure - YYYY-MM-DD + +**Run**: [workflow #run-id](run-url) +**Commit**: `short-sha` +**Workflow**: [PR | Merge Queue | Trunk | Nightly] + +**Status**: [Same error | Slightly different | Related] + +**Error**: +``` +[Error message if different] +``` + +**Notes**: +[Any new observations or patterns] + +**Failure count**: Now X failures out of Y observed runs +``` + +**gh CLI command**: +```bash +gh issue comment ISSUE_NUMBER --body "$(cat update-comment.md)" +``` + +## Label Standards + +**Workflow labels** (one): +- `pr-workflow` - cicd_1-pr.yml +- `merge-queue` - cicd_2-merge-queue.yml +- `trunk-workflow` - cicd_3-trunk.yml +- `nightly` - cicd_4-nightly.yml + +**Type labels** (one or more): +- `test-failure` - Test failed +- `build-failure` - Compilation/build failed +- `deployment-failure` - Deployment step failed +- `flaky-test` - Intermittent test failure +- `infrastructure` - Infrastructure/external issue + +**Severity labels** (one): +- `critical` - Blocking all builds +- `high-priority` - Affecting multiple PRs/runs +- `medium-priority` - Intermittent or limited impact +- `low-priority` - Rare or minor issue + +**Always include**: +- `bug` (for failures) +- `ci-cd` (for all CI/CD issues) + +## Title Conventions + +**Build Failure**: +``` +[CI/CD] Brief description of what failed +``` +Examples: +- `[CI/CD] ContentTypeAPIImplTest.testCreate fails with NPE` +- `[CI/CD] Maven compilation error in ContentTypeResource` +- `[CI/CD] Docker build timeout in trunk workflow` + +**Flaky Test**: +``` +[Flaky Test] TestClass.testMethod - X% failure rate +``` +Examples: +- `[Flaky Test] ContentTypeAPIImplTest.testConcurrent - 15% failure rate` +- `[Flaky Test] WorkflowAPITest.testTransition - intermittent timeout` + +**Infrastructure**: +``` +[Infrastructure] Brief description of issue +``` +Examples: +- `[Infrastructure] Elasticsearch startup timeouts in nightly builds` +- `[Infrastructure] Maven Central connectivity issues` + +## Quick Issue Creation Commands + +**New build failure**: +```bash +gh issue create \ + --title "[CI/CD] Test/Build description" \ + --label "bug,ci-cd,pr-workflow,test-failure" \ + --assignee "@me" \ + --body "$(cat < failed-job.log + +# Much smaller than full archive! +``` + +### 3. Progressive Log Extraction + +```bash +# Download full archive +gh run download $RUN_ID --dir ./logs + +# List contents first (don't extract) +unzip -l logs.zip | head -50 + +# Identify structure +# Typical structure: +# - 1_Job Name/ +# - 2_Step Name.txt +# - 3_Another Step.txt + +# Extract ONLY failed job directory +unzip logs.zip "*/Failed Job Name/*" -d extracted/ + +# Or stream search without extracting +unzip -p logs.zip "**/[0-9]*_*.txt" | grep "pattern" | head -100 +``` + +## Pattern Matching Strategies + +### Maven Build Failures + +**Primary indicators** (check these first): +```bash +# Maven errors (most reliable) +unzip -p logs.zip "**/[0-9]*_*.txt" | grep -A 10 -B 3 "\[ERROR\]" | head -100 + +# Build failure summary +unzip -p logs.zip "**/[0-9]*_*.txt" | grep -A 20 "BUILD FAILURE" | head -100 + +# Compilation errors +unzip -p logs.zip "**/[0-9]*_*.txt" | grep -A 15 "COMPILATION ERROR" | head -50 +``` + +**What to look for**: +- `[ERROR] Failed to execute goal` - Maven plugin failures +- `[ERROR] COMPILATION ERROR` - Java compilation issues +- `[ERROR] There are test failures` - Test failures +- `[ERROR] Could not resolve dependencies` - Dependency issues + +### Test Failures + +**Test failure markers** (surefire/failsafe): +```bash +# Test failure summary +unzip -p logs.zip "**/[0-9]*_*.txt" | grep -E "Tests run:.*Failures: [1-9]" | head -20 + +# Individual test failures +unzip -p logs.zip "**/[0-9]*_*.txt" | grep -A 25 "<<< FAILURE!" | head -200 + +# Test errors (crashes) +unzip -p logs.zip "**/[0-9]*_*.txt" | grep -A 25 "<<< ERROR!" | head -200 +``` + +**Test failure structure**: +``` +[ERROR] Tests run: 150, Failures: 2, Errors: 0, Skipped: 5 +... +[ERROR] testMethodName(com.dotcms.TestClass) Time elapsed: 1.234 s <<< FAILURE! +java.lang.AssertionError: Expected X but was Y + at org.junit.Assert.fail(Assert.java:88) + at com.dotcms.TestClass.testMethodName(TestClass.java:123) +``` + +**Extract failure details**: +```bash +# Get test class and method +grep "<<< FAILURE!" logs.txt | sed 's/.*\(test[A-Za-z]*\)(\([^)]*\).*/\2.\1/' + +# Get exception type and message +grep -A 5 "<<< FAILURE!" logs.txt | grep -E "^[a-zA-Z.]*Exception|^java.lang.AssertionError" +``` + +### Stack Trace Analysis + +**Find relevant stack traces**: +```bash +# Find DotCMS code in stack traces (ignore framework) +unzip -p logs.zip "**/[0-9]*_*.txt" | \ + grep -A 50 "Exception:" | \ + grep -E "at com\.(dotcms|dotmarketing)\." | \ + head -100 +``` + +**Stack trace structure**: +``` +java.lang.NullPointerException: Cannot invoke method on null object + at com.dotcms.MyClass.myMethod(MyClass.java:456) ← Target this + at com.dotcms.OtherClass.caller(OtherClass.java:123) ← And this + at org.junit.internal.runners... ← Ignore framework + at sun.reflect... ← Ignore JVM +``` + +**Priority**: Lines starting with `at com.dotcms` or `at com.dotmarketing` + +### Infrastructure Issues + +**Patterns to search**: +```bash +# Timeout issues +grep -i "timeout\|timed out\|deadline exceeded" logs.txt | head -20 + +# Connection issues +grep -i "connection refused\|connection reset\|unable to connect" logs.txt | head -20 + +# Rate limiting +grep -i "rate limit\|too many requests\|429" logs.txt | head -20 + +# Resource exhaustion +grep -i "out of memory\|cannot allocate\|disk.*full" logs.txt | head -20 + +# Docker issues +grep -i "docker.*error\|failed to pull\|image not found" logs.txt | head -20 +``` + +### Dependency Issues + +**Patterns**: +```bash +# Dependency resolution failures +grep -i "could not resolve\|failed to resolve\|artifact not found" logs.txt | head -30 + +# Version conflicts +grep -i "version conflict\|duplicate\|incompatible" logs.txt | head -20 + +# Download issues +grep -i "failed to download\|connection to.*refused" logs.txt | head-20 +``` + +## Test Report XML Analysis + +**Structure** (surefire/failsafe XML): +```xml + + + + + + + +``` + +**Parse with Read tool or xmllint**: +```bash +# Extract test results only +unzip logs.zip "**/*surefire-reports/*.xml" -d test-results/ + +# Count failures per test suite +find test-results -name "*.xml" -exec grep -H "failures=" {} \; | grep -v 'failures="0"' + +# Extract failure messages +xmllint --xpath "//failure/@message" test-results/*.xml +``` + +## Efficient Search Workflow + +### Step-by-Step Process + +**1. Quick Status Check (30 seconds)**: +```bash +gh run view $RUN_ID --json conclusion,jobs \ + --jq '{conclusion, failed_jobs: [.jobs[] | select(.conclusion == "failure") | .name]}' +``` + +**2. Failed Job Details (1 minute)**: +```bash +gh api "/repos/dotCMS/core/actions/runs/$RUN_ID/jobs" \ + --jq '.jobs[] | select(.conclusion == "failure") | + {name, failed_steps: [.steps[] | select(.conclusion == "failure") | .name]}' +``` + +**3. Check Test Artifacts (1 minute)**: +```bash +# List test result artifacts +gh api "/repos/dotCMS/core/actions/runs/$RUN_ID/artifacts" \ + --jq '.artifacts[] | select(.name | contains("test-results")) | {name, id, size_in_bytes}' + +# Download if small (< 10 MB) +# Skip if large or expired +``` + +**4. Job-Specific Logs (2-3 minutes)**: +```bash +# Download only failed job logs +FAILED_JOB_ID= +gh api "/repos/dotCMS/core/actions/jobs/$FAILED_JOB_ID/logs" > failed-job.log + +# Search for Maven errors +grep -A 10 "\[ERROR\]" failed-job.log | head -100 + +# Search for test failures +grep -A 25 "<<< FAILURE!" failed-job.log | head -200 +``` + +**5. Full Archive Analysis (5+ minutes, only if needed)**: +```bash +# Download full logs +gh run download $RUN_ID --name logs --dir ./logs + +# List contents +unzip -l logs/*.zip | grep -E "\.txt$" | head -50 + +# Stream search (no extraction) +unzip -p logs/*.zip "**/[0-9]*_*.txt" | grep -E "\[ERROR\]|<<< FAILURE!" | head -300 +``` + +## Pattern Recognition Guide + +### Error Type Identification + +**Compilation Error**: +``` +[ERROR] COMPILATION ERROR +[ERROR] /path/to/File.java:[123,45] cannot find symbol +``` +→ Code syntax error, missing import, type mismatch + +**Test Failure (Assertion)**: +``` +<<< FAILURE! +java.lang.AssertionError: expected: but was: +``` +→ Test expectation not met, code behavior changed + +**Test Error (Exception)**: +``` +<<< ERROR! +java.lang.NullPointerException + at com.dotcms.MyClass.method(MyClass.java:123) +``` +→ Unexpected exception, code defect + +**Timeout**: +``` +org.junit.runners.model.TestTimedOutException: test timed out after 30000 milliseconds +``` +→ Test hung, infinite loop, or infrastructure slow + +**Connection/Infrastructure**: +``` +java.net.ConnectException: Connection refused +Could not resolve host: repository.example.com +``` +→ Network issue, external service down, infrastructure problem + +**Dependency Issue**: +``` +[ERROR] Failed to collect dependencies +Could not resolve dependencies for project com.dotcms:dotcms-core +``` +→ Maven repository issue, version conflict, missing artifact + +## Context Window Optimization + +**Problem**: Cannot load 500 MB of logs into context + +**Solutions**: + +1. **Targeted extraction**: Get only relevant sections +```bash +# Extract just the error summary from a 500 MB log +unzip -p logs.zip "**/5_Test.txt" | \ + grep -A 50 "\[ERROR\] Tests run:" | \ + head -200 +# Result: ~10 KB instead of 500 MB +``` + +2. **Layered analysis**: + - First: Maven ERROR lines (usually < 100 lines) + - Second: Specific test failure (usually < 50 lines) + - Third: Stack trace for that test (usually < 30 lines) + - Total: ~200 lines instead of millions + +3. **Use structured data when possible**: + - XML test reports: Parse for failures only + - JSON from gh CLI: Filter with jq + - Grep with line limits: Never more than needed + +## Common Pitfalls + +❌ **Don't do this**: +```bash +# Downloads and extracts EVERYTHING (5-10 min, huge context) +gh run download $RUN_ID +unzip -q logs.zip +cat **/*.txt > all-logs.txt # 1 GB+ file +``` + +✅ **Do this instead**: +```bash +# Targeted search (30 sec, minimal context) +gh run download $RUN_ID --name logs +unzip -p logs/*.zip "**/[0-9]*_*.txt" | grep -A 10 "\[ERROR\]" | head -100 +``` + +❌ **Don't do this**: +```bash +# Read entire log file +Read: /path/to/5-Test-step.txt # 200 MB file +``` + +✅ **Do this instead**: +```bash +# Use Bash grep to extract relevant lines first +grep -A 20 "<<< FAILURE!" /path/to/5-Test-step.txt | head -200 > failures-only.txt +# Then read the small extracted file +Read: failures-only.txt # 10 KB file +``` + +## Quick Reference Commands + +### Fastest Diagnosis Commands +```bash +# 1. Which job failed? (10 sec) +gh run view $RUN_ID --json jobs --jq '.jobs[] | select(.conclusion == "failure") | .name' + +# 2. What step failed? (10 sec) +gh api "/repos/dotCMS/core/actions/runs/$RUN_ID/jobs" --jq '.jobs[] | select(.conclusion == "failure") | .steps[] | select(.conclusion == "failure") | .name' + +# 3. Get that job's logs (30 sec) +FAILED_JOB_ID=$(gh api "/repos/dotCMS/core/actions/runs/$RUN_ID/jobs" --jq '.jobs[] | select(.conclusion == "failure") | .id' | head -1) +gh api "/repos/dotCMS/core/actions/jobs/$FAILED_JOB_ID/logs" > job.log + +# 4. Find Maven errors (5 sec) +grep -A 10 "\[ERROR\]" job.log | head -100 + +# 5. Find test failures (5 sec) +grep -A 25 "<<< FAILURE!" job.log | head -200 +``` + +**Total time**: ~60 seconds to identify most failures + +## Log Analysis Checklist + +When analyzing logs: +- [ ] Start with job-level logs via API (fastest) +- [ ] Look for Maven `[ERROR]` markers first +- [ ] Search for test failure markers: `<<< FAILURE!`, `<<< ERROR!` +- [ ] Extract stack traces with DotCMS code only +- [ ] Check for infrastructure patterns if no code errors +- [ ] Use grep line limits (`head`, `tail`) religiously +- [ ] Only download full archive if absolutely necessary +- [ ] Never try to read entire log files without filtering \ No newline at end of file diff --git a/.claude/skills/cicd-diagnostics/README.md b/.claude/skills/cicd-diagnostics/README.md new file mode 100644 index 000000000000..a20e381da8b9 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/README.md @@ -0,0 +1,274 @@ +# CI/CD Diagnostics Skill + +Expert diagnostic tool for analyzing DotCMS CI/CD build failures in GitHub Actions. + +## Skill Overview + +This skill provides automated diagnosis of CI/CD failures across all DotCMS workflows: +- **cicd_1-pr.yml** - Pull Request validation +- **cicd_2-merge-queue.yml** - Pre-merge full validation +- **cicd_3-trunk.yml** - Post-merge deployment +- **cicd_4-nightly.yml** - Scheduled full test runs + +## Capabilities + +### 🔍 Intelligent Failure Analysis +- Identifies failed jobs and steps +- Extracts relevant errors from large log files efficiently +- Classifies failures (new, flaky, infrastructure, test filtering) +- Compares workflow results (PR vs merge queue) +- Checks historical patterns across runs + +### 📊 Root Cause Determination +- New failures introduced by specific commits +- Flaky tests with failure rate calculation +- Infrastructure issues (timeouts, connectivity) +- Test filtering discrepancies between workflows +- External dependency changes + +### 🔗 GitHub Integration +- Searches existing issues for known problems +- Creates detailed GitHub issues with proper labels +- Links failures to related PRs and commits +- Provides actionable recommendations +- **HTML scraping for workflow syntax errors** (when API doesn't expose them) + +### ⚡ Efficiency Optimized +- Progressive disclosure of log analysis +- Streaming search without full extraction +- Job-specific log downloads +- Pattern-based error detection +- Context window optimized +- Direct HTML scraping for workflow annotations (GitHub API doesn't expose them) + +## Skill Structure + +``` +cicd-diagnostics/ +├── SKILL.md # Main skill instructions (concise, <300 lines) +├── WORKFLOWS.md # Detailed workflow documentation +├── LOG_ANALYSIS.md # Advanced log analysis techniques +├── ISSUE_TEMPLATE.md # GitHub issue templates +└── README.md # This file +``` + +## Usage + +The skill activates automatically when you ask questions like: + +- "Why did the build fail?" +- "Check CI/CD status" +- "Analyze run 19131365567" +- "Is ContentTypeAPIImplTest flaky?" +- "Why did my PR pass but merge queue fail?" +- "What's blocking the merge queue?" +- "Debug the nightly build failure" + +Or invoke explicitly: +```bash +/cicd-diagnostics +``` + +## Example Scenarios + +### Scenario 1: Analyze Specific Run +``` +You: "Analyze https://github.com/dotCMS/core/actions/runs/19131365567" + +Skill: +1. Extracts run ID and fetches run details +2. Identifies failed jobs and steps +3. Downloads and analyzes logs efficiently +4. Determines root cause with evidence +5. Checks for known issues +6. Provides actionable recommendations +``` + +### Scenario 2: Check Current PR +``` +You: "Check my PR build status" + +Skill: +1. Gets current branch name +2. Finds associated PR +3. Gets latest PR workflow runs +4. Analyzes any failures +5. Reports status and recommendations +``` + +### Scenario 3: Flaky Test Investigation +``` +You: "Is ContentTypeAPIImplTest flaky?" + +Skill: +1. Searches nightly build history +2. Counts failures vs successes +3. Calculates failure rate +4. Checks existing flaky test issues +5. Recommends action (fix vs quarantine) +``` + +### Scenario 4: Workflow Comparison +``` +You: "Why did PR pass but merge queue fail?" + +Skill: +1. Gets PR workflow results +2. Gets merge queue results for same commit +3. Identifies test filtering differences +4. Explains discrepancy +5. Recommends fixing the filtered tests +``` + +## Key Principles + +### Efficiency First +- Start with high-level status (30 sec) +- Progress to detailed logs only if needed (5+ min) +- Use streaming and filtering for large files +- Target specific patterns based on failure type + +### Workflow Context Matters +- **PR failures** → Usually code issues or filtered tests +- **Merge queue failures** → Test filtering, conflicts, or flaky tests +- **Trunk failures** → Deployment/artifact issues +- **Nightly failures** → Flaky tests or infrastructure + +### Progressive Investigation +1. Run status → Failed jobs (30 sec) +2. Maven errors → Test failures (2 min) +3. Full log analysis (5+ min, only if needed) +4. Historical comparison (2 min) +5. Issue creation (2 min, if needed) + +## Reference Files + +### SKILL.md +Main skill instructions with: +- Core workflow types +- 7-step diagnostic approach +- Key principles and efficiency tips +- Success criteria + +**Use**: Core instructions loaded when skill activates + +### WORKFLOWS.md +Detailed workflow documentation: +- Each workflow's purpose and triggers +- Common failure patterns with detection methods +- Test strategies and typical durations +- Cross-cutting failure causes +- Diagnostic decision tree + +**Use**: Reference when you need detailed workflow-specific information + +### LOG_ANALYSIS.md +Advanced log analysis techniques: +- Smart download strategies +- Pattern matching for different error types +- Efficient search workflows +- Context window optimization +- Quick reference commands + +**Use**: Reference when analyzing logs to find specific patterns efficiently + +### ISSUE_TEMPLATE.md +GitHub issue templates: +- Build Failure Report +- Flaky Test Report +- Infrastructure Issue Report +- Failure Update Comment +- Label standards and conventions + +**Use**: Reference when creating or updating GitHub issues + +## Best Practices + +### Do ✅ +- Start with job status before downloading logs +- Use streaming (`unzip -p`) for large archives +- Search for Maven `[ERROR]` first +- Check test filtering differences (PR vs merge queue) +- Compare with historical runs +- Search existing issues before creating new ones +- Provide specific, actionable recommendations + +### Don't ❌ +- Download entire log archives unnecessarily +- Try to read full logs without filtering +- Assume PR passing means all tests pass (filtering!) +- Create duplicate issues without searching +- Provide vague recommendations +- Ignore workflow context + +## Integration with GitHub CLI + +All commands use `gh` CLI for: +- Workflow run queries +- Job and step details +- Log downloads +- Artifact management +- Issue search and creation +- PR status checks + +**Required**: `gh` CLI installed and authenticated + +## Output Format + +Standard diagnostic report structure: +```markdown +## CI/CD Failure Diagnosis: [workflow] #[run-id] + +**Root Cause**: [Category] - [Explanation] +**Confidence**: [High/Medium/Low] + +### Failure Details +[Specific job, step, test information] + +### Classification +[Type, frequency, related issues] + +### Evidence +[Key log excerpts, commits, patterns] + +### Recommendations +[Actionable steps with commands/links] +``` + +## Success Criteria + +A successful diagnosis provides: +1. ✅ Specific failure point (job, step, test) +2. ✅ Root cause category with evidence +3. ✅ New vs recurring classification +4. ✅ Known issue status +5. ✅ Actionable recommendations +6. ✅ Issue creation if needed + +## Contributing + +When updating this skill: +1. Keep SKILL.md concise (<500 lines) +2. Move detailed content to reference files +3. Maintain one level of reference depth +4. Test with real failure scenarios +5. Update examples with actual patterns +6. Keep commands up-to-date with gh CLI + +## Version History + +- **v2.3.2** (2025-12-09) - Removed redundant API call + - Simplified to HTML scraping only (faster, clearer output) + - API provides zero unique value for workflow syntax errors + - 1-2 seconds performance improvement +- **v2.3.1** (2025-12-09) - HTML scraping for workflow syntax errors + - Added HTML scraping to detect workflow syntax validation errors not exposed by GitHub API + - Comprehensive API limitation research and documentation +- **v2.3.0** (2025-12-09) - Workflow annotations detection + - Job state categorization (skipped vs never_evaluated) + - Enhanced evidence presentation +- **v1.0** (2025-11-06) - Initial skill creation + - Four workflow support + - Progressive disclosure structure + - Efficient log analysis + - GitHub issue integration \ No newline at end of file diff --git a/.claude/skills/cicd-diagnostics/REFERENCE.md b/.claude/skills/cicd-diagnostics/REFERENCE.md new file mode 100644 index 000000000000..61b98fea5d41 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/REFERENCE.md @@ -0,0 +1,657 @@ +# CI/CD Diagnostics Reference Guide + +Detailed technical expertise and diagnostic patterns for DotCMS CI/CD failure analysis. + +## Table of Contents + +1. [Core Expertise & Approach](#core-expertise--approach) +2. [Specialized Diagnostic Skills](#specialized-diagnostic-skills) +3. [Design Philosophy](#design-philosophy) +4. [Detailed Analysis Patterns](#detailed-analysis-patterns) +5. [Report Templates](#report-templates) +6. [User Collaboration Examples](#user-collaboration-examples) +7. [Comparison with Old Approach](#comparison-with-old-approach) + +## Core Expertise & Approach + +### Technical Depth + +**GitHub Actions:** +- Runner environments, workflow dispatch patterns, matrix builds +- Test filtering strategies, artifact propagation +- Caching strategies and optimization + +**DotCMS Architecture:** +- Java/Maven build system +- Docker containers, PostgreSQL/Elasticsearch dependencies +- Integration test infrastructure + +**Testing Frameworks:** +- JUnit 5, Postman collections, Karate scenarios, Playwright E2E tests + +**Log Analysis:** +- Efficient parsing of multi-GB logs +- Error cascade detection +- Timing correlation +- Infrastructure failure patterns + +## Specialized Diagnostic Skills + +### Timing & Race Condition Recognition + +**Clock precision issues:** +- Second-level timestamps causing non-deterministic ordering (e.g., modDate sorting failures) +- Pattern indicators: Boolean flip assertions, intermittent ordering failures + +**Test execution timing:** +- Rapid test execution causing identical timestamps +- sleep() vs Awaitility patterns +- Pattern indicators: Tests that fail faster on faster CI runners + +**Database timing:** +- Transaction isolation, commit timing +- Optimistic locking failures + +**Async operation timing:** +- Background jobs, scheduled tasks +- Publish/expire date updates + +**Cache timing:** +- TTL expiration races +- Cache invalidation timing + +### Async Testing Anti-Patterns (CRITICAL) + +**Thread.sleep() anti-pattern:** +- Fixed delays causing flaky tests (too short = intermittent failure, too long = slow tests) +- Pattern indicators: + - `Thread.sleep(1000)` or `Thread.sleep(5000)` in test code + - Intermittent failures with timing-related assertions + - Tests that fail faster on faster CI runners + - "Expected X but was Y" where Y is intermediate state + - Flakiness that increases under load or on slower machines + +**Correct Async Testing Patterns:** + +```java +// ❌ WRONG: Fixed sleep (flaky and slow) +publishContent(content); +Thread.sleep(5000); // Hope it's done by now! +assertTrue(isPublished(content)); + +// ✅ CORRECT: Awaitility with timeout and polling +publishContent(content); +await() + .atMost(Duration.ofSeconds(10)) + .pollInterval(Duration.ofMillis(100)) + .untilAsserted(() -> assertTrue(isPublished(content))); + +// ✅ CORRECT: With meaningful error message +await() + .atMost(10, SECONDS) + .pollDelay(100, MILLISECONDS) + .untilAsserted(() -> { + assertThat(getContentStatus(content)) + .describedAs("Content %s should be published", content.getId()) + .isEqualTo(Status.PUBLISHED); + }); + +// ✅ CORRECT: Await condition (more efficient than untilAsserted) +await() + .atMost(Duration.ofSeconds(10)) + .until(() -> isPublished(content)); +``` + +**When to recommend Awaitility:** +- Any test with `Thread.sleep()` followed by assertions +- Any test checking async operation results (publish, index, cache update) +- Any test with timing-dependent behavior +- Any test that fails intermittently with state-related assertions + +### Threading & Concurrency Issues + +**Thread safety violations:** +- Shared mutable state, non-atomic operations +- Race conditions on counters/maps + +**Deadlock patterns:** +- Circular lock dependencies +- Database connection pool exhaustion + +**Thread pool problems:** +- Executor queue overflow, thread starvation, improper shutdown + +**Quartz job context:** +- Background jobs running in separate thread pools +- Different lifecycle than HTTP requests + +**Concurrent modification:** +- ConcurrentModificationException +- Iterator failures during parallel access + +**Pattern indicators:** +- NullPointerException in background threads +- "user" is null errors +- Intermittent failures under load + +### Request Context Issues (CRITICAL for DotCMS) + +**Servlet lifecycle boundaries:** +- HTTP request/response lifecycle vs background thread execution + +**ThreadLocal anti-patterns:** +- HttpServletRequestThreadLocal accessed from Quartz jobs +- Scheduled tasks or thread pools accessing request context + +**Request object recycling:** +- Tomcat request object reuse after response completion + +**User context propagation:** +- Failure to pass User object to background operations +- Bundle publishing, permission jobs + +**Session scope leakage:** +- Session-scoped beans accessed from background threads + +**Pattern indicators:** +- `Cannot invoke "com.liferay.portal.model.User.getUserId()" because "user" is null` +- `HttpServletRequest` accessed after response completion +- NullPointerException in `PublisherQueueJob`, `IdentifierDateJob`, `CascadePermissionsJob` +- Failures in bundle publishing, content push, or scheduled background tasks + +**Common DotCMS Request Context Patterns:** + +```java +// ❌ WRONG: Accessing HTTP request in background thread (Quartz job) +User user = HttpServletRequestThreadLocal.INSTANCE.getRequest().getUser(); // NPE! + +// ✅ CORRECT: Pass user context explicitly +PublisherConfig config = new PublisherConfig(); +config.setUser(systemUser); // Or user from bundle metadata +``` + +### Workflow Annotations Detection (CRITICAL) + +**What are workflow annotations?** +GitHub Actions workflow syntax validation errors that are: +- Visible in the GitHub UI but NOT in job logs +- Returned via the annotations API endpoint +- The root cause of jobs being skipped or never evaluated + +**Pattern indicators:** +- Jobs marked as "skipped" but no conditional logic (`if`, `needs`) explains it +- Workflow run shows "completed" but expected jobs didn't run +- Release phase or deployment jobs missing from run +- No error messages in job logs despite failed workflow + +**Example annotation format:** +``` +.github/workflows/cicd_6-release.yml (Line: 132, Col: 24): Unexpected value 'true' +``` + +**Common annotation error types:** +1. **Syntax Errors** + - Unexpected value types (`true` instead of string, etc.) + - Invalid YAML syntax (indentation, quotes, etc.) + - Unrecognized keys or properties + +2. **Validation Failures** + - Invalid job dependencies (`needs` references non-existent job) + - Invalid action references (typos in action names) + - Invalid workflow triggers or event configurations + +3. **Expression Errors** + - Invalid GitHub expressions (`${{ }}` syntax errors) + - Undefined context variables or secrets + - Type mismatches in expressions + +**When to check for annotations:** +- **ALWAYS check first** when analyzing workflow failures +- **CRITICAL when**: Jobs are marked "skipped" without obvious reason +- **ESSENTIAL when**: Deployment or release phases are missing from run +- **IMPORTANT when**: Workflow completed but expected jobs didn't execute + +**How annotations affect diagnosis:** +- **Jobs marked "skipped"** may actually be "never evaluated due to syntax error" +- **No job logs exist** for jobs prevented by syntax errors +- **Root cause is in workflow file**, not in application code or tests +- **Fix requires workflow YAML changes**, not code changes + +### Analytical Methodology + +1. **Progressive Investigation:** Start with high-level patterns (30s), drill down only when needed (up to 10+ min for complex issues) +2. **Evidence-Based Reasoning:** Facts are facts, hypotheses are clearly labeled as such +3. **Multiple Hypothesis Testing:** Consider competing explanations before committing to root cause +4. **Efficient Resource Use:** Extract minimal necessary log context (99%+ size reduction for large files) +5. **Annotations-First Approach:** Check workflow annotations BEFORE diving into job logs + +### Problem-Solving Philosophy + +- **Adaptive Intelligence:** Recognize new failure patterns without pre-programmed rules +- **Skeptical Validation:** Don't accept first obvious answer; validate through evidence +- **User Collaboration:** When multiple paths exist, present options and ask user preference +- **Fact Discipline:** Known facts labeled as facts, theories labeled as theories, confidence levels explicit + +## Design Philosophy + +This skill follows an **AI-guided, utility-assisted** approach: + +- **Utilities** handle data access, caching, and extraction (Python modules) +- **AI** (you, the senior engineer) handles pattern recognition, classification, and reasoning + +**Why this works:** +- Senior engineers excel at recognizing new patterns and explaining reasoning +- Utilities excel at fast, cached data access and log extraction +- Avoids brittle hardcoded classification logic +- Adapts to new failure modes without code changes + +## Detailed Analysis Patterns + +### Example AI Analysis + +```markdown +## Failure Analysis + +**Test**: ContentTypeCommandIT.Test_Command_Content_Filter_Order_By_modDate_Ascending +**Pattern**: Boolean flip assertion on modDate ordering +**Match**: Issue #33746 - modDate precision timing + +**Classification**: Flaky Test (High Confidence) + +**Reasoning**: +1. Test compares modDate ordering (second-level precision) +2. Assertion shows intermittent true/false flip +3. Exact match with documented issue #33746 +4. Not a functional bug (would fail consistently) + +**Fingerprint**: +- test: ContentTypeCommandIT.Test_Command_Content_Filter_Order_By_modDate_Ascending +- pattern: modDate-ordering +- assertion: boolean-flip +- line: 477 +- known-issue: #33746 + +**Recommendation**: Known flaky test tracked in #33746. Fixes in progress. +``` + +## Report Templates + +### DIAGNOSIS.md Template + +```markdown +# CI/CD Failure Diagnosis - Run {RUN_ID} + +**Analysis Date:** {DATE} +**Run URL:** {URL} +**Workflow:** {WORKFLOW_NAME} +**Event:** {EVENT_TYPE} +**Conclusion:** {CONCLUSION} +**Analyzed By:** cicd-diagnostics skill with AI-guided analysis + +--- + +## Executive Summary +[2-3 sentence overview of the failure] + +--- + +## Failure Details +[Specific failure information with line numbers and context] + +### Failed Job +- **Name:** {JOB_NAME} +- **Job ID:** {JOB_ID} +- **Duration:** {DURATION} + +### Specific Test Failure +- **Test:** {TEST_NAME} +- **Location:** Line {LINE_NUMBER} +- **Error Type:** {ERROR_TYPE} +- **Assertion:** {ASSERTION_MESSAGE} + +--- + +## Root Cause Analysis + +### Classification: **{CATEGORY}** ({CONFIDENCE} Confidence) + +### Evidence Supporting Diagnosis +[Detailed evidence-based reasoning] + +### Why This Is/Isn't a Code Defect +[Clear explanation] + +--- + +## Test Fingerprint + +**Natural Language Description:** +[Human-readable description of failure pattern] + +**Matching Criteria for Future Failures:** +[How to identify similar failures] + +--- + +## Impact Assessment + +### Severity: **{SEVERITY}** + +### Business Impact +- **Blocking:** {YES/NO} +- **False Positive:** {YES/NO} +- **Developer Friction:** {LEVEL} +- **CI/CD Reliability:** {IMPACT_DESCRIPTION} + +### Frequency Analysis +[Historical failure data] + +### Risk Assessment +[Risk levels for different categories] + +--- + +## Recommendations + +### Immediate Actions (Unblock) +1. [Specific action with command/link] + +### Short-term Solutions (Reduce Issues) +2. [Solution with explanation] + +### Long-term Improvements (Prevent Recurrence) +3. [Systemic improvement suggestion] + +--- + +## Related Context + +### GitHub Issues +[Related open/closed issues] + +### Recent Workflow History +[Pattern analysis from recent runs] + +### Related PR/Branch +[Context about what triggered this run] + +--- + +## Diagnostic Artifacts + +All diagnostic data saved to: `{WORKSPACE_PATH}` + +### Files Generated +- `run-metadata.json` - Workflow run metadata +- `jobs-detailed.json` - All job details +- `failed-job-*.txt` - Complete job logs +- `error-sections.txt` - Extracted error sections +- `evidence.txt` - Structured evidence +- `DIAGNOSIS.md` - This report +- `ANALYSIS_EVALUATION.md` - Skill effectiveness evaluation + +--- + +## Conclusion +[Final summary with action items] + +**Action Required:** +1. [Priority action] +2. [Follow-up action] + +**Status:** [Ready for retry | Needs code fix | Investigation needed] +``` + +### ANALYSIS_EVALUATION.md Template + +```markdown +# Skill Effectiveness Evaluation - Run {RUN_ID} + +**Purpose:** Meta-analysis of cicd-diagnostics skill performance for continuous improvement. + +--- + +## Analysis Summary + +- **Run Analyzed:** {RUN_ID} +- **Time to Diagnosis:** {DURATION} +- **Cached Data Used:** {YES/NO} +- **Evidence Size:** {LOG_SIZE} → {EXTRACTED_SIZE} +- **Classification:** {CATEGORY} ({CONFIDENCE} confidence) + +--- + +## What Worked Well + +### 1. {Category} ✅ +[Specific success with examples] + +### 2. {Category} ✅ +[Specific success with examples] + +--- + +## AI Adaptive Analysis Strengths + +The skill successfully demonstrated AI-guided analysis by: + +1. **Natural Pattern Recognition** + [How AI identified patterns without hardcoded rules] + +2. **Contextual Reasoning** + [How AI connected evidence to root cause] + +3. **Cross-Reference Synthesis** + [How AI linked to related issues/history] + +4. **Confidence Assessment** + [How AI provided reasoning for confidence level] + +5. **Comprehensive Recommendations** + [How AI generated actionable solutions] + +**Key Insight:** The AI adapted to evidence rather than following rigid rules, enabling: +- [Specific capability 1] +- [Specific capability 2] +- [Specific capability 3] + +--- + +## What Could Be Improved + +### 1. {Area for Improvement} +- **Gap:** [What was missing] +- **Impact:** [Effect on analysis] +- **Suggestion:** [Specific improvement idea] + +### 2. {Area for Improvement} +- **Gap:** [What was missing] +- **Impact:** [Effect on analysis] +- **Suggestion:** [Specific improvement idea] + +--- + +## Performance Metrics + +### Speed +- **Data Fetching:** {TIME} +- **Evidence Extraction:** {TIME} +- **AI Analysis:** {TIME} +- **Total Duration:** {TIME} +- **vs Manual Analysis:** {COMPARISON} + +### Accuracy +- **Root Cause Correct:** {YES/NO/PARTIAL} +- **Known Issue Match:** {YES/NO/PARTIAL} +- **Classification Accuracy:** {CONFIDENCE_LEVEL} + +### Completeness +- [x] Identified specific failure point +- [x] Determined root cause with reasoning +- [x] Created natural test fingerprint +- [x] Assessed frequency/history +- [x] Checked known issues +- [x] Provided actionable recommendations +- [x] Saved diagnostic artifacts + +--- + +## Design Validation + +### AI-Guided Approach ✅/❌ +[How well the evidence-driven AI analysis worked] + +### Utility Functions ✅/❌ +[How well the Python utilities performed] + +### Caching Strategy ✅/❌ +[How well the workspace caching worked] + +--- + +## Recommendations for Skill Enhancement + +### High Priority +1. [Specific improvement with rationale] +2. [Specific improvement with rationale] + +### Medium Priority +3. [Specific improvement with rationale] +4. [Specific improvement with rationale] + +### Low Priority +5. [Specific improvement with rationale] + +--- + +## Comparison with Previous Approaches + +### Before (Hardcoded Logic) +[Issues with rule-based classification] + +### After (AI-Guided) +[Benefits of evidence-driven analysis] + +### Impact +- **Accuracy:** [Improvement] +- **Flexibility:** [Improvement] +- **Maintainability:** [Improvement] + +--- + +## Conclusion + +[Overall assessment of skill effectiveness] + +**Key Strengths:** +- [Strength 1] +- [Strength 2] +- [Strength 3] + +**Areas for Growth:** +- [Area 1] +- [Area 2] + +**Ready for production use:** {YES/NO} +**Recommended next steps:** [Action items] +``` + +## User Collaboration Examples + +### When to Ask for User Input: + +**1. Multiple plausible root causes** with similar evidence weight: +``` +I've identified two equally plausible explanations: + +1. **Test filtering discrepancy** - Test may be filtered in PR but runs in merge queue +2. **Environmental timing issue** - Race condition in test setup + +Would you like me to: +A) Deep dive into test filtering configuration (5 min analysis) +B) Analyze test timing patterns across recent runs (5 min analysis) +C) Investigate both in parallel (10 min analysis) +``` + +**2. Insufficient information** requiring deeper investigation: +``` +**FACT**: Test failed with NullPointerException at line 234 +**HYPOTHESIS**: Could be either (a) data initialization race or (b) mock configuration issue +**NEED**: Additional log context around test setup (lines 200-240) + +Would you like me to extract and analyze the full setup context? This will add ~2 min. +``` + +**3. Trade-offs between investigation paths:** +``` +I can either: +- **Quick path** (2 min): Verify this matches known flaky test pattern → recommend retry +- **Thorough path** (10 min): Analyze why test is flaky → identify potential fix + +What's your priority: unblock immediately or understand root cause? +``` + +**4. Recommendation requires user context:** +``` +This appears to be a genuine code defect in the new pagination logic. + +Options: +1. Revert PR and investigate offline +2. Push fix commit to existing PR +3. Merge with known issue and create follow-up + +What's the team's current priority: stability or feature velocity? +``` + +## Comparison with Old Approach + +### Before (Hardcoded Logic) + +```bash +# 100+ lines of pattern matching +detect_flaky_patterns() { + if grep -qi "modDate"; then indicators+=("date_ordering"); fi + if grep -E "expected: but was: "; then indicators+=("boolean_flip"); fi + # ... 20 more hardcoded rules +} + +classify_root_cause() { + if [ "$has_known_issue" = true ]; then category="flaky_test"; fi + # ... 50 more lines of brittle logic +} +``` + +**Problems:** +- Misses new patterns +- Can't explain reasoning +- Hard to maintain +- macOS incompatible + +### After (AI-Guided) + +```python +# Present evidence to AI +evidence = present_complete_diagnostic(log_file) + +# AI analyzes and explains: +# "This is ContentTypeCommandIT with modDate ordering (line 477), +# boolean flip assertion, matching known issue #33746. +# Classification: Flaky Test (high confidence)" +``` + +**Benefits:** +- Recognizes new patterns +- Explains reasoning clearly +- Easy to maintain +- Works on all platforms +- More accurate + +## Additional Context + +For more information: +- [WORKFLOWS.md](WORKFLOWS.md) - Detailed workflow descriptions and failure patterns +- [LOG_ANALYSIS.md](LOG_ANALYSIS.md) - Advanced log analysis techniques +- [utils/README.md](utils/README.md) - Utility function reference +- [ISSUE_TEMPLATE.md](ISSUE_TEMPLATE.md) - Issue creation template + + diff --git a/.claude/skills/cicd-diagnostics/SKILL.md b/.claude/skills/cicd-diagnostics/SKILL.md new file mode 100644 index 000000000000..7141cc19e9bc --- /dev/null +++ b/.claude/skills/cicd-diagnostics/SKILL.md @@ -0,0 +1,797 @@ +--- +name: cicd-diagnostics +description: Diagnoses DotCMS GitHub Actions failures (PR builds, merge queue, nightly, trunk). Analyzes failed tests, root causes, compares runs. Use for "fails in GitHub", "merge queue failure", "PR build failed", "nightly build issue". +version: 2.2.0 +dependencies: python>=3.8 +--- + +# CI/CD Build Diagnostics + +**Persona: Senior Platform Engineer - CI/CD Specialist** + +You are an experienced platform engineer specializing in DotCMS CI/CD failure diagnosis. See [REFERENCE.md](REFERENCE.md) for detailed technical expertise and diagnostic patterns. + +## Core Workflow Types + +- **cicd_1-pr.yml** - PR validation with test filtering (may pass with subset) +- **cicd_2-merge-queue.yml** - Full test suite before merge (catches filtered tests) +- **cicd_3-trunk.yml** - Post-merge deployment (uses artifacts, no test re-run) +- **cicd_4-nightly.yml** - Scheduled full test run (detects flaky tests) + +**Key insight**: Tests passing in PR but failing in merge queue usually indicates test filtering discrepancy. + +## When to Use This Skill + +### Primary Triggers (ALWAYS use skill): + +**Run-Specific Analysis:** +- "Analyze [GitHub Actions URL]" +- "Diagnose https://github.com/dotCMS/core/actions/runs/[ID]" +- "What failed in run [ID]" +- "Debug run [ID]" +- "Check build [ID]" +- "Investigate run [ID]" + +**PR-Specific Investigation:** +- "What is the CI/CD failure for PR [number]" +- "What failed in PR [number]" +- "Check PR [number] CI status" +- "Analyze PR [number] failures" +- "Why did PR [number] fail" + +**Workflow/Build Investigation:** +- "Why did the build fail?" +- "What's wrong with the CI?" +- "Check CI/CD status" +- "Debug [workflow-name] failure" +- "What's failing in CI?" + +**Comparative Analysis:** +- "Why did PR pass but merge queue fail?" +- "Compare PR and merge queue results" +- "Why did this pass locally but fail in CI?" + +**Flaky Test Investigation:** +- "Is [test] flaky?" +- "Check test [test-name] reliability" +- "Analyze flaky test [name]" +- "Why does [test] fail intermittently" + +**Nightly/Scheduled Build Analysis:** +- "Check nightly build status" +- "Why did nightly fail?" +- "Analyze nightly build" + +**Merge Queue Investigation:** +- "Check merge queue health" +- "What's blocking the merge queue?" +- "Why is merge queue failing?" + +### Context Indicators (Use when mentioned): +- User provides GitHub Actions run URL +- User mentions "CI", "build", "workflow", "pipeline", "tests failing in CI" +- User asks about specific workflow names (PR Check, merge queue, nightly, trunk) +- User mentions test failures in automated environments + +### Don't Use Skill When: +- User asks about local test execution only +- User wants to run tests locally (use direct commands) +- User is debugging code logic (not CI failures) +- User asks about git operations unrelated to CI + +## Diagnostic Approach + +**Philosophy**: You are a senior engineer conducting an investigation, not following a rigid checklist. Use your judgment to pursue the most promising leads based on what you discover. The steps below are tools and techniques, not a mandatory sequence. + +**Core Investigation Pattern**: +1. **Understand the context** - What failed? When? How often? +2. **Gather evidence** - Logs, errors, timeline, patterns +3. **Form hypotheses** - What are the possible causes? +4. **Test hypotheses** - Which evidence supports/refutes each? +5. **Draw conclusions** - Root cause with confidence level +6. **Provide recommendations** - How to fix, prevent, or investigate further + +--- + +## Investigation Decision Tree + +**Use this to guide your investigation approach based on initial findings:** + +``` +Start → Identify what failed → Gather evidence → What type of failure? + +├─ Test Failure? +│ ├─ Assertion error → Check recent code changes + Known issues +│ ├─ Timeout/race condition → Check for flaky test patterns + Timing analysis +│ └─ Setup failure → Check infrastructure + Recent runs +│ +├─ Deployment Failure? +│ ├─ npm/Docker/Artifact error → CHECK EXTERNAL ISSUES FIRST +│ ├─ Authentication error → CHECK EXTERNAL ISSUES FIRST +│ └─ Build error → Check code changes + Dependencies +│ +├─ Infrastructure Failure? +│ ├─ Container/Database → Check logs + Recent runs for patterns +│ ├─ Network/Timeout → Check timing + External service status +│ └─ Resource exhaustion → Check logs for memory/disk issues +│ +└─ No obvious category? + → Gather more evidence → Present complete diagnostic → AI analysis +``` + +**Key Decision Points:** + +1. **After gathering evidence** → Does this look like external service issue? + - YES → Run external_issues.py, check service status, search web + - NO → Focus on code changes, test patterns, internal issues + +2. **After checking known issues** → Is this a duplicate? + - YES → Link to existing issue, assess if new information + - NO → Continue investigation + +3. **After initial analysis** → Confidence level? + - HIGH → Write diagnosis, create issue if needed + - MEDIUM/LOW → Gather more context, compare runs, deep dive logs + +--- + +## Investigation Toolkit + +Use these techniques flexibly based on your decision tree path: + +### Setup and Load Utilities (Always Start Here) + +**CRITICAL**: All commands must run from repository root. Never use `cd` to change directories. + +**CRITICAL**: This skill uses Python 3.8+ for all utility scripts. Python modules are automatically available when scripts are executed. + +**🚨 CRITICAL - SCRIPT PARAMETER ORDER 🚨** + +**ALL fetch-*.py scripts use the SAME parameter order:** + +``` +fetch-metadata.py +fetch-jobs.py +fetch-logs.py [JOB_ID] +``` + +**Remember: RUN_ID is ALWAYS first, WORKSPACE is ALWAYS second!** + +Initialize the diagnostic workspace: + +```bash +# Use the Python init script to set up workspace +RUN_ID=19131365567 +python3 .claude/skills/cicd-diagnostics/init-diagnostic.py "$RUN_ID" +# Outputs: WORKSPACE=/path/to/.claude/diagnostics/run-{RUN_ID} + +# IMPORTANT: Extract and set WORKSPACE variable from output +WORKSPACE="/Users/stevebolton/git/core2/.claude/diagnostics/run-${RUN_ID}" +``` + +**Available Python utilities** (imported automatically): +- **workspace.py** - Diagnostic workspace with automatic caching +- **github_api.py** - GitHub API wrappers for runs/jobs/logs +- **evidence.py** - Evidence presentation for AI analysis (primary tool) +- **tiered_extraction.py** - Tiered log extraction (Level 1/2/3) + +All utilities use Python standard library and GitHub CLI (gh). No external Python packages required. + +### Identify Target and Create Workspace + +**Extract run ID from URL or PR:** + +```bash +# From URL: https://github.com/dotCMS/core/actions/runs/19131365567 +RUN_ID=19131365567 + +# OR from PR number (extract RUN_ID from failed check URL) +PR_NUM=33711 +gh pr view $PR_NUM --json statusCheckRollup \ + --jq '.statusCheckRollup[] | select(.conclusion == "FAILURE") | .detailsUrl' | head -1 +# Extract RUN_ID from the URL output + +# Workspace already created by init script in step 0 +WORKSPACE="/Users/stevebolton/git/core2/.claude/diagnostics/run-${RUN_ID}" +``` + +### 2. Fetch Workflow Data (with caching) + +**Use Python helper scripts - remember: RUN_ID first, WORKSPACE second:** + +```bash +# ✅ CORRECT PARAMETER ORDER: + +# Example values for reference: +# RUN_ID=19131365567 +# WORKSPACE="/Users/stevebolton/git/core2/.claude/diagnostics/run-19131365567" + +# Fetch metadata (uses caching) +python3 .claude/skills/cicd-diagnostics/fetch-metadata.py "$RUN_ID" "$WORKSPACE" +# ^^^^^^^^ ^^^^^^^^^^ +# FIRST SECOND + +# Fetch jobs (uses caching) +python3 .claude/skills/cicd-diagnostics/fetch-jobs.py "$RUN_ID" "$WORKSPACE" +# ^^^^^^^^ ^^^^^^^^^^ +# FIRST SECOND + +# 🚨 NEW: Fetch workflow annotations (CRITICAL - check first!) +python3 .claude/skills/cicd-diagnostics/fetch-annotations.py "$RUN_ID" "$WORKSPACE" +# ^^^^^^^^ ^^^^^^^^^^ +# FIRST SECOND + +# Set file paths +METADATA="$WORKSPACE/run-metadata.json" +JOBS="$WORKSPACE/jobs-detailed.json" +ANNOTATIONS="$WORKSPACE/annotations.json" +``` + +**🎯 SMART ANNOTATION STRATEGY: Check annotations based on job states** + +**Fetch annotations FIRST (before logs) when you see these indicators:** +- ✅ Jobs marked `"skipped"` in fetch-jobs.py output (check for `if:` conditions) +- ✅ Expected jobs (release, deploy) completely missing from workflow run +- ✅ Workflow shows "completed" but didn't execute all expected phases +- ✅ Job conclusion is `"startup_failure"` or `"action_required"` (not `"failure"`) +- ✅ No obvious error messages in initial metadata review + +**Skip annotations (go straight to logs) when you see:** +- ❌ All expected jobs ran and failed (conclusion: `"failure"` with logs available) +- ❌ Clear test failures or build errors visible in job summaries +- ❌ Authentication/infrastructure errors already apparent in metadata +- ❌ Obvious root cause already identified (e.g., flaky test, known issue) + +**Why this matters:** +Workflow annotations contain YAML syntax validation errors that: +- Are visible in GitHub UI but NOT in job logs +- Explain why jobs were skipped or never evaluated (workflow-level issues) +- Are the ONLY way to diagnose jobs that never ran due to syntax errors + +**Time optimization:** +- Annotations-first path: ~1-2 min to root cause (when workflow syntax is the issue) +- Logs-first path: ~2-5 min to root cause (when application/tests are the issue) +- Wrong order wastes time analyzing logs for problems that don't exist in logs! + +### 3. Download Failed Job Logs + +The fetch-jobs.py script displays failed job IDs. Use those to download logs: + +```bash +# ✅ CORRECT PARAMETER ORDER: [JOB_ID] + +# Example values for reference: +# RUN_ID=19131365567 +# WORKSPACE="/Users/stevebolton/git/core2/.claude/diagnostics/run-19131365567" +# FAILED_JOB_ID=54939324205 + +# Download logs for specific failed job +python3 .claude/skills/cicd-diagnostics/fetch-logs.py "$RUN_ID" "$WORKSPACE" "$FAILED_JOB_ID" +# ^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^^ +# FIRST SECOND THIRD (optional) + +# Or download all failed job logs (omit JOB_ID) +python3 .claude/skills/cicd-diagnostics/fetch-logs.py "$RUN_ID" "$WORKSPACE" +``` + +**❌ COMMON MISTAKES TO AVOID:** + +```bash +# ❌ WRONG - Missing RUN_ID (only 2 params when you need 3) +python3 .claude/skills/cicd-diagnostics/fetch-logs.py "$WORKSPACE" "$FAILED_JOB_ID" + +# ❌ WRONG - Swapped RUN_ID and WORKSPACE +python3 .claude/skills/cicd-diagnostics/fetch-logs.py "$WORKSPACE" "$RUN_ID" "$FAILED_JOB_ID" + +# ❌ WRONG - Job ID in second position +python3 .claude/skills/cicd-diagnostics/fetch-logs.py "$RUN_ID" "$FAILED_JOB_ID" "$WORKSPACE" +``` + +**Parameter order**: RUN_ID, WORKSPACE, JOB_ID (optional) +- If you get "WORKSPACE parameter appears to be a job ID" error, you likely forgot RUN_ID or swapped parameters +- All three scripts (fetch-metadata.py, fetch-jobs.py, fetch-logs.py) use the same order +- **Mnemonic: Think "Run → Where → What" (Run ID → Workspace → Job ID)** + +### 4. Present Evidence to AI (KEY STEP!) + +**This is where AI-guided analysis begins.** Use Python `evidence.py` to present raw data: + +```python +from pathlib import Path +import sys +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from evidence import ( + get_log_stats, extract_error_sections_only, + present_complete_diagnostic +) + +# Use actual values from your workspace (replace with your IDs) +RUN_ID = "19131365567" +FAILED_JOB_ID = "54939324205" +WORKSPACE = Path(f"/Users/stevebolton/git/core2/.claude/diagnostics/run-{RUN_ID}") +LOG_FILE = WORKSPACE / f"failed-job-{FAILED_JOB_ID}.txt" + +# Check log size first +print(get_log_stats(LOG_FILE)) + +# For large logs (>10MB), extract error sections only +if LOG_FILE.stat().st_size > 10485760: + print("Large log detected - extracting error sections...") + ERROR_FILE = WORKSPACE / "error-sections.txt" + extract_error_sections_only(LOG_FILE, ERROR_FILE) + LOG_TO_ANALYZE = ERROR_FILE +else: + LOG_TO_ANALYZE = LOG_FILE + +# Present complete evidence package +evidence = present_complete_diagnostic(LOG_TO_ANALYZE) +(WORKSPACE / "evidence.txt").write_text(evidence) + +# Display evidence for AI analysis +print(evidence) +``` + +**What this shows:** +- Failed tests (JUnit, E2E, Postman) +- Error messages with context +- Assertion failures (expected vs actual) +- Stack traces +- Timing indicators (timeouts, race conditions) +- Infrastructure indicators (Docker, DB, ES) +- First error context (for cascade detection) +- Failure timeline +- Known issues matching test name + +### Check Known Issues (Guided by Evidence) + +**Decision Point: When should you check for known issues?** + +**Check Internal GitHub Issues when:** +- Error message/test name suggests a known pattern +- After identifying the failure type (test, deployment, infrastructure) +- Quick search can save deep analysis time + +**Check External Issues when evidence suggests:** +- 🔴 **HIGH Priority** - Authentication errors + service names (npm, Docker, GitHub) +- 🟡 **MEDIUM Priority** - Infrastructure errors + timing correlation +- ⚪ **LOW Priority** - Test failures with clear assertions + +**Skip external checks if:** +- Test assertion failure with obvious code bug +- Known flaky test already documented +- Recent PR introduced clear breaking change + +#### A. Automated External Issue Detection (Use When Warranted) + +**The external_issues.py utility helps decide if external investigation is needed:** + +```python +from pathlib import Path +import sys +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from external_issues import ( + extract_error_indicators, + generate_search_queries, + suggest_external_checks, + format_external_issue_report +) + +LOG_FILE = Path("$WORKSPACE/failed-job-12345.txt") +log_content = LOG_FILE.read_text(encoding='utf-8', errors='ignore') + +# Extract error patterns +indicators = extract_error_indicators(log_content) + +# Generate targeted search queries +search_queries = generate_search_queries(indicators, "2025-11-10") + +# Get specific recommendations +recent_runs = [ + ("2025-11-10", "failure"), + ("2025-11-09", "failure"), + ("2025-11-08", "failure"), + ("2025-11-07", "failure"), + ("2025-11-06", "success") +] +suggestions = suggest_external_checks(indicators, recent_runs) + +# Print formatted report +print(format_external_issue_report(indicators, search_queries, suggestions)) +``` + +**This utility automatically:** +- Detects npm, Docker, GitHub Actions errors +- Identifies authentication/token issues +- Assesses likelihood of external cause (LOW/MEDIUM/HIGH) +- Generates targeted web search queries +- Suggests specific external sources to check + +#### B. Search Internal GitHub Issues + +```bash +# Search for error-specific keywords from evidence +gh issue list --search "npm ERR" --state all --limit 10 --json number,title,state,createdAt,labels + +# Search for component-specific issues +gh issue list --search "docker build" --state all --limit 10 +gh issue list --label "ci-cd" --state all --limit 20 + +# Look for recently closed issues (may have resurfaced) +gh issue list --search "authentication token" --state closed --limit 10 +``` + +**Pattern matching:** +- Extract key error codes (e.g., `EOTP`, `ENEEDAUTH`, `ERR_CONNECTION_REFUSED`) +- Search for component names (e.g., `npm`, `docker`, `elasticsearch`) +- Look for similar failure patterns in issue descriptions + +#### C. Execute Web Searches for High-Likelihood External Issues + +**When the utility suggests HIGH likelihood of external cause:** + +Use the generated search queries from step A with WebSearch tool: + +```python +# Execute top priority searches +for query in search_queries[:3]: # Top 3 most relevant + print(f"\n🔍 Searching: {query}\n") + # Use WebSearch tool with the query +``` + +**Key external sources to check:** +1. **npm registry**: https://github.blog/changelog/ (search: "npm security token") +2. **GitHub Actions status**: https://www.githubstatus.com/ +3. **Docker Hub status**: https://status.docker.com/ +4. **Service changelogs**: Check breaking changes in major versions + +**When to use WebFetch:** +- To read specific changelog pages identified by searches +- To validate exact dates of service changes +- To get detailed migration instructions + +```python +# Example: Fetch npm security update details +WebFetch( + url="https://github.blog/changelog/2025-11-05-npm-security-update...", + prompt="Extract the key dates, changes to npm tokens, and impact on CI/CD workflows" +) +``` + +#### D. Correlation Analysis + +**Red flags for external issues:** +- ✅ Failure started on specific date with no code changes +- ✅ Error mentions external service (npm, Docker Hub, GitHub) +- ✅ Authentication/authorization errors +- ✅ Multiple unrelated projects affected (search reveals community reports) +- ✅ Error message suggests policy change ("requires 2FA", "token expired") + +**Document findings:** +```markdown +## Known Issues + +### Internal (dotCMS Repository) +- Issue #XXXXX: Similar error, status, resolution + +### External (Service Provider Changes) +- Service: +- Change Date: +- Impact: +- Source: +- Timeline: +``` + +### Senior Engineer Analysis (Evidence-Based Reasoning) + +**As a senior engineer, analyze the evidence systematically:** + +#### A. Initial Hypothesis Generation +Consider **multiple competing hypotheses**: +- **Code Defect** - New bug introduced by recent changes? +- **Flaky Test - Timing Issue** - Race condition, clock precision, async timing? +- **Flaky Test - Concurrency Issue** - Thread safety violation, deadlock, shared state? +- **Request Context Issue** - ThreadLocal accessed from background thread? User null in Quartz job? +- **Infrastructure Issue** - Docker/DB/ES environment problem? +- **Test Filtering** - PR test subset passed, full merge queue suite failed? +- **Cascading Failure** - Primary error triggering secondary failures? + +**Apply specialized diagnostic lens** (see [REFERENCE.md](REFERENCE.md) for detailed patterns): +- Look for timing patterns: Identical timestamps, boolean flips, ordering failures +- Check thread context: Background jobs (Quartz), async operations, thread pool execution +- Identify request lifecycle: HTTP request boundary vs background execution +- Examine concurrency: Shared state, locks, atomic operations + +#### B. Evidence Evaluation +For each hypothesis, assess supporting/contradicting evidence: +- **FACT**: What the logs definitively show (error messages, line numbers, stack traces) +- **HYPOTHESIS**: What this might indicate (must be labeled as theory) +- **CONFIDENCE**: How certain are you (High/Medium/Low with reasoning) + +#### C. Differential Diagnosis +Apply systematic elimination: +1. Check recent code changes vs failure (correlation ≠ causation) +2. Search known issues for matching patterns (exact matches = high confidence) +3. Analyze recent run history (consistent vs intermittent) +4. Examine error timing and cascades (primary vs secondary failures) + +#### D. Log Context Extraction (Efficient) +**For large logs (>10MB):** +- Extract only relevant error sections (99%+ reduction) +- Identify specific line numbers and context (±10 lines) +- Note timing patterns (timestamps show cascade vs independent) +- Track infrastructure events (Docker, DB connections, ES indices) + +**When you need more context from logs:** +```python +from pathlib import Path +import re + +LOG_FILE = Path("$WORKSPACE/failed-job-12345.txt") +lines = LOG_FILE.read_text(encoding='utf-8', errors='ignore').split('\n') + +# Extract specific context around an error (lines 450-480) +print('\n'.join(lines[449:480])) + +# Search for related errors by pattern +for i, line in enumerate(lines, 1): + if "ContentTypeCommandIT" in line: + print(f"{i}: {line}") + if i >= 20: + break + +# Get timing correlation for cascade analysis +timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}') +for line in lines[:50]: + if timestamp_pattern.match(line) and ("ERROR" in line or "FAILURE" in line): + print(line) +``` + +#### E. Final Classification +Provide evidence-based conclusion: + +1. **Root Cause Classification** + - Category: New failure / Flaky test / Infrastructure / Test filtering + - Confidence: High / Medium / Low (with reasoning) + - Competing hypotheses considered and why rejected + +2. **Test Fingerprint** (natural language) + - Test name and exact location (file:line) + - Failure pattern (assertion type, timing characteristics, error signature) + - Key identifiers for matching similar failures + +3. **Known Issue Matching** + - Exact matches with open GitHub issues + - Pattern matches with documented flaky tests + - If no match: clearly state "No known issue found" + +4. **Impact Assessment** + - Blocking status (is this blocking merge/deploy?) + - False positive likelihood (should retry help?) + - Frequency analysis (first occurrence vs recurring) + - Developer friction impact + +### 7. Get Additional Context (if needed) + +**For comparative analysis or frequency checks:** + +```python +import sys +from pathlib import Path +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from evidence import present_recent_runs +from github_api import get_recent_runs +import json + +WORKSPACE = Path("$WORKSPACE") +METADATA_FILE = WORKSPACE / "run-metadata.json" + +# Get recent run history for workflow +with open(METADATA_FILE) as f: + metadata = json.load(f) +workflow_name = metadata.get('workflowName') +print(present_recent_runs(workflow_name, 20)) + +# For PR vs Merge Queue comparison +if "merge-queue" in workflow_name: + current_sha = metadata.get('headSha') + pr_runs = get_recent_runs("cicd_1-pr.yml", 1) + if pr_runs and pr_runs[0].get('headSha') == current_sha: + pr_result = pr_runs[0].get('conclusion') + if pr_result == "success": + print("⚠️ Test Filtering Issue: PR passed but merge queue failed") + print("This suggests test was filtered in PR but ran in merge queue") +``` + +### 8. Generate Comprehensive Report + +**AI writes report naturally** (not a template): + +**CRITICAL**: Generate TWO separate reports: +1. **DIAGNOSIS.md** - User-facing failure diagnosis (no skill evaluation) +2. **ANALYSIS_EVALUATION.md** - Skill effectiveness evaluation (meta-analysis) + +See [REFERENCE.md](REFERENCE.md) for report templates and structure. + +**IMPORTANT**: +- **DIAGNOSIS.md** = User-facing failure analysis (what failed, why, how to fix) +- **ANALYSIS_EVALUATION.md** = Internal skill evaluation (how well the skill performed) +- DO NOT mix skill effectiveness evaluation into DIAGNOSIS.md +- Users should not see skill meta-analysis in their failure reports + +### 9. Collaborate with User (When Multiple Paths Exist) + +**As a senior engineer, when you encounter decision points or uncertainty, engage the user:** + +#### When to Ask for User Input: +1. **Multiple plausible root causes** with similar evidence weight +2. **Insufficient information** requiring deeper investigation +3. **Trade-offs between investigation paths** +4. **Recommendation requires user context** + +See [REFERENCE.md](REFERENCE.md) for examples of user collaboration patterns. + +### 10. Create Issue (if needed) + +**After analysis, determine if issue creation is warranted:** + +```python +import subprocess +import json + +# Senior engineer judgment call based on: +# - Is this already tracked? (check known issues) +# - Is this a new failure? (check recent history) +# - Is this blocking development? (impact assessment) +# - Would an issue help track/fix it? (actionability) + +if CREATE_ISSUE: + issue_body = f"""## Summary +{summary} + +## Failure Evidence +{evidence_excerpts} + +## Root Cause Analysis +{analysis_with_confidence} + +## Reproduction Pattern +{reproduction_steps} + +## Diagnostic Run +- Run ID: {RUN_ID} +- Workspace: {WORKSPACE} + +## Recommended Actions +{recommendations} +""" + + subprocess.run([ + "gh", "issue", "create", + "--title", f"[CI/CD] {brief_description}", + "--label", "bug,ci-cd,Flakey Test", + "--body", issue_body + ]) +``` + +## Key Principles + +### 1. Evidence-Driven, Not Rule-Based + +**Don't hardcode classification logic**. Present evidence and let AI reason: + +❌ **Bad** (rigid rules): +```python +if "modDate" in log_content: + return "flaky_test" +if "npm" in log_content: + check_external_always() # Wasteful +``` + +✅ **Good** (AI interprets evidence): +```python +evidence = present_complete_diagnostic(log_file) +# AI sees "modDate + boolean flip + issue #33746" → concludes "flaky test" +# AI sees "npm ERR! + EOTP + timing correlation" → checks external issues +# AI sees "AssertionError + recent PR" → focuses on code changes +``` + +### 2. Adaptive Investigation Depth + +**Let findings guide how deep you go:** + +``` +Quick Win (30 sec - 2 min) +└─ Known issue? → Link and done +└─ Clear error? → Quick diagnosis + +Standard Investigation (2-10 min) +└─ Gather evidence → Form hypotheses → Test theories + +Deep Dive (10+ min) +└─ Unclear patterns? → Compare runs, check history, analyze timing +└─ Multiple theories? → Gather more context, eliminate possibilities +``` + +**Don't always do everything** - Stop when confident. + +### 3. Context Shapes Interpretation + +**Same error, different meaning in different workflows:** + +``` +"Test timeout" in PR workflow → Might be code issue, check changes +"Test timeout" in nightly → Likely flaky test, check history +"npm ERR!" in deployment → Check external issues FIRST +"npm ERR!" in build → Check package.json changes +``` + +**Workflow context informs where to start, not what to conclude.** + +### 4. Tool Selection Based on Failure Type + +**Don't use every tool every time:** + +| Failure Type | Primary Tools | Skip | +|--------------|---------------|------| +| Deployment/Auth | external_issues.py, WebSearch | Deep log analysis | +| Test assertion | Code changes, test history | External checks | +| Flaky test | Run history, timing patterns | External checks | +| Infrastructure | Recent runs, log patterns | Code changes | + +### 5. Leverage Caching + +Workspace automatically caches: +- Run metadata +- Job details +- Downloaded logs +- Evidence extraction + +**Rerunning the skill uses cached data** (much faster!) + +## Output Format + +**Write naturally, like a senior engineer writing to a colleague.** Include relevant sections based on what you discovered: + +**Core sections (always):** +- **Executive Summary** - What failed and why (2-3 sentences) +- **Root Cause** - Your conclusion with confidence level and reasoning +- **Evidence** - Key findings that support your conclusion +- **Recommendations** - What should happen next + +**Additional sections (as relevant):** +- **Known Issues** - Internal or external issues found (if checked) +- **Timeline Analysis** - When it started failing (if relevant) +- **Test Fingerprint** - Pattern for matching (if test failure) +- **Impact Assessment** - Blocking status, frequency (if important) +- **Competing Hypotheses** - Theories you ruled out (if multiple possibilities) + +**Don't force sections that don't add value.** A deployment authentication error doesn't need a "Test Fingerprint" section. + +## Success Criteria + +**Investigation Quality:** +✅ Identified specific failure point with evidence +✅ Determined root cause with reasoning (not just labels) +✅ Assessed whether this is a known issue (when relevant) +✅ Made appropriate use of external validation (when patterns suggest it) +✅ Provided actionable recommendations + +**Process Quality:** +✅ Used adaptive investigation depth (stopped when confident) +✅ Let evidence guide technique selection (didn't use every tool blindly) +✅ Explained confidence level and competing theories +✅ Saved diagnostic artifacts in workspace +✅ Wrote natural, contextual report (not template-filled) + +## Reference Files + +For detailed information: +- [REFERENCE.md](REFERENCE.md) - Detailed technical expertise, diagnostic patterns, and examples +- [WORKFLOWS.md](WORKFLOWS.md) - Workflow descriptions and patterns +- [LOG_ANALYSIS.md](LOG_ANALYSIS.md) - Advanced log analysis techniques +- [utils/README.md](utils/README.md) - Utility function reference +- [ISSUE_TEMPLATE.md](ISSUE_TEMPLATE.md) - Issue creation template +- [README.md](README.md) - Quick reference and examples diff --git a/.claude/skills/cicd-diagnostics/WORKFLOWS.md b/.claude/skills/cicd-diagnostics/WORKFLOWS.md new file mode 100644 index 000000000000..6d00e95205ab --- /dev/null +++ b/.claude/skills/cicd-diagnostics/WORKFLOWS.md @@ -0,0 +1,347 @@ +# DotCMS CI/CD Workflows Reference + +Complete documentation of workflow behaviors and failure patterns. + +## cicd_1-pr.yml - Pull Request Validation + +**Purpose**: Fast feedback on PR changes with optimized test selection + +**Triggers**: +- Pull request opened/synchronized +- Re-run requested + +**Test Strategy**: +- **Filtered tests**: Runs subset based on changed files +- **Optimization goal**: Fast feedback (5-15 min typical) +- **Trade-off**: May miss integration issues caught in full suite + +**Common Failure Patterns**: + +1. **Code Compilation Errors** + - Pattern: `[ERROR] COMPILATION ERROR` + - Cause: Syntax errors, missing imports, type errors + - Log location: Maven build output, early in job + - Action: Fix compilation errors in PR + +2. **Unit Test Failures** + - Pattern: `Tests run:.*Failures: [1-9]` + - Cause: Breaking changes in code + - Log location: Surefire reports + - Action: Fix failing tests or revert breaking change + +3. **Lint/Format Violations** + - Pattern: `Checkstyle violations`, `PMD violations` + - Cause: Code style issues + - Log location: Static analysis step + - Action: Run `mvn spotless:apply` locally + +4. **Filtered Test Passes (False Positive)** + - Pattern: PR passes, merge queue fails + - Cause: Integration test not run in PR due to filtering + - Detection: Compare PR vs merge queue results for same commit + - Action: Run full test suite locally or wait for merge queue + +**Typical Duration**: 5-20 minutes + +**Workflow URL**: https://github.com/dotCMS/core/actions/workflows/cicd_1-pr.yml + +## cicd_2-merge-queue.yml - Pre-Merge Full Validation + +**Purpose**: Comprehensive validation before merging to main branch + +**Triggers**: +- PR added to merge queue (manual or automated) +- Required status checks passed + +**Test Strategy**: +- **Full test suite**: ALL tests run (integration, unit, E2E) +- **No filtering**: Catches issues missed in PR workflow +- **Duration**: 30-60 minutes typical + +**Common Failure Patterns**: + +1. **Test Filtering Discrepancy** + - Pattern: PR passed ✓, merge queue failed ✗ + - Cause: Test filtered in PR, failed in full suite + - Detection: Same commit, different outcomes + - Action: Fix the test that was filtered out + - Prevention: Run full suite locally before merge + +2. **Multiple PR Conflicts** + - Pattern: PR A passes, PR B passes, merge queue with both fails + - Cause: Conflicting changes between PRs + - Detection: Multiple PRs in queue, all passing individually + - Log pattern: Integration test failures, database state issues + - Action: Rebase one PR on the other, re-test + +3. **Previous PR Failure Contamination** + - Pattern: PR fails immediately after another PR failure + - Cause: Shared state or resources from previous run + - Detection: Check previous run in queue + - Action: Re-run the workflow (no code changes needed) + +4. **Branch Not Synchronized** + - Pattern: Tests fail that pass on main + - Cause: PR branch behind main, missing recent fixes + - Detection: `gh pr view $PR --json mergeable` shows `BEHIND` + - Action: Merge main into PR branch, re-test + +5. **Flaky Tests** + - Pattern: Intermittent failures, passes on re-run + - Cause: Test has race conditions, timing dependencies + - Detection: Same test fails/passes across runs + - Action: Investigate test, add to flaky test tracking + - Labels: `flaky-test` + +6. **Infrastructure Timeouts** + - Pattern: `timeout`, `connection refused`, `rate limit exceeded` + - Cause: GitHub Actions infrastructure, external services + - Detection: No code changes, external error messages + - Action: Re-run workflow, check GitHub status + +**Typical Duration**: 30-90 minutes + +**Critical Checks Before Merge**: +```bash +# Verify PR is up to date +gh pr view $PR_NUMBER --json mergeStateStatus + +# Check for other PRs in queue +gh pr list --search "is:open base:main label:merge-queue" + +# Review recent merge queue runs +gh run list --workflow=cicd_2-merge-queue.yml --limit 10 +``` + +**Workflow URL**: https://github.com/dotCMS/core/actions/workflows/cicd_2-merge-queue.yml + +## cicd_3-trunk.yml - Post-Merge Deployment + +**Purpose**: Deploy merged changes, publish artifacts, build Docker images + +**Triggers**: +- Successful merge to main branch +- Uses artifacts from merge queue (no test re-run) + +**Key Operations**: +1. Retrieve build artifacts from merge queue +2. Deploy to staging environment +3. Build and push Docker images +4. Run CLI smoke tests +5. Update documentation sites + +**Common Failure Patterns**: + +1. **Artifact Retrieval Failure** + - Pattern: `artifact not found`, `download failed` + - Cause: Merge queue artifacts expired or missing + - Detection: Early failure in artifact download step + - Action: Re-run merge queue to regenerate artifacts + +2. **Docker Build Failure** + - Pattern: `failed to build`, `COPY failed`, `image too large` + - Cause: Dockerfile changes, dependency updates, resource limits + - Log location: Docker build step + - Action: Review Dockerfile changes, check layer sizes + +3. **Docker Push Failure** + - Pattern: `denied: access forbidden`, `rate limit`, `timeout` + - Cause: Registry authentication, network, rate limits + - Detection: Build succeeds, push fails + - Action: Check registry credentials, retry after rate limit + +4. **CLI Tool Failures** + - Pattern: CLI command errors, integration failures + - Cause: API changes breaking CLI, environment config + - Log location: CLI test/validation steps + - Action: Review CLI compatibility with API changes + +5. **Deployment Configuration Issues** + - Pattern: Configuration errors, environment variable issues + - Cause: Missing secrets, config changes + - Detection: Deployment step failures + - Action: Verify environment configuration in GitHub secrets + +**Important Notes**: +- Tests are NOT re-run (assumes merge queue validation) +- Test failures here indicate artifact corruption or environment issues +- Deployment failures don't necessarily mean code issues + +**Typical Duration**: 15-30 minutes + +**Workflow URL**: https://github.com/dotCMS/core/actions/workflows/cicd_3-trunk.yml + +## cicd_4-nightly.yml - Scheduled Full Validation + +**Purpose**: Detect flaky tests, infrastructure issues, external dependency changes + +**Triggers**: +- Scheduled (nightly, e.g., 2 AM UTC) +- Manual trigger via workflow dispatch + +**Test Strategy**: +- Full test suite against main branch +- Latest dependencies (detects upstream breaking changes) +- Longer timeout thresholds +- Multiple test runs for flaky detection (optional) + +**Common Failure Patterns**: + +1. **Flaky Test Detection** + - Pattern: Test fails occasionally, not consistently + - Cause: Race conditions, timing dependencies, resource contention + - Detection: Failure rate < 100% over multiple nights + - Analysis: Track test across 20-30 nightly runs + - Action: Mark as flaky, investigate root cause + - Threshold: >5% failure rate = needs attention + +2. **External Dependency Changes** + - Pattern: Tests fail after dependency update + - Cause: Upstream library using `latest` or mutable version + - Detection: No code changes in repo, failure starts suddenly + - Log pattern: `NoSuchMethodError`, API compatibility errors + - Action: Pin dependency versions, update code for compatibility + +3. **GitHub Actions Version Changes** + - Pattern: Workflow steps fail, GitHub Actions behavior changed + - Cause: GitHub Actions runner or action version updated + - Detection: Workflow YAML unchanged, runner behavior different + - Log pattern: Action warnings, deprecation notices + - Action: Update action versions explicitly in workflow + +4. **Infrastructure Degradation** + - Pattern: Timeouts, slow tests, resource exhaustion + - Cause: GitHub Actions infrastructure issues + - Detection: Tests pass but take much longer, timeouts + - Action: Check GitHub Actions status, wait for resolution + +5. **Database/Elasticsearch State Issues** + - Pattern: Tests fail with data inconsistencies + - Cause: Cleanup issues, state leakage between tests + - Detection: Tests pass individually, fail in suite + - Action: Improve test isolation, add cleanup + +6. **Time-Dependent Test Failures** + - Pattern: Tests fail at specific times (timezone, daylight saving) + - Cause: Hard-coded dates, timezone assumptions + - Detection: Failure coincides with date/time changes + - Action: Use relative dates, mock time in tests + +**Flaky Test Analysis Process**: +```bash +# Get last 30 nightly runs +gh run list --workflow=cicd_4-nightly.yml --limit 30 --json databaseId,conclusion,createdAt + +# For specific test, count failures +# (requires parsing test report artifacts across runs) + +# Calculate flaky percentage +# Flaky if: 5% < failure rate < 95% +# Consistently failing if: failure rate >= 95% +# Stable if: failure rate < 5% +``` + +**Typical Duration**: 45-90 minutes + +**Workflow URL**: https://github.com/dotCMS/core/actions/workflows/cicd_4-nightly.yml + +## Cross-Cutting Failure Causes + +These affect all workflows: + +### Reproducibility Issues + +**External Dependencies with Mutable Versions**: +- Maven dependencies using version ranges or `LATEST` +- Docker base images using `latest` tag +- GitHub Actions without pinned versions (@v2 vs @v2.1.0) +- NPM dependencies without lock file or using `^` ranges + +**Detection**: +- Failures start suddenly without code changes +- Different results across runs with same code +- Dependency resolution messages in logs + +**Prevention**: +- Pin all dependency versions explicitly +- Use lock files (package-lock.json, yarn.lock) +- Pin GitHub Actions to commit SHA: `uses: actions/checkout@a12b3c4` +- Avoid `latest` tags for Docker images + +### Infrastructure Issues + +**GitHub Actions Platform**: +- Runner outages or degraded performance +- Artifact storage issues +- Registry rate limits +- Network connectivity issues + +**Detection**: +```bash +# Check GitHub status +curl -s https://www.githubstatus.com/api/v2/status.json | jq '.status.description' + +# Look for infrastructure patterns in logs +grep -i "timeout\|rate limit\|connection refused\|runner.*fail" logs.txt +``` + +**Action**: Wait for GitHub resolution, retry workflow + +**External Services**: +- Maven Central unavailable +- Docker Hub rate limits +- NPM registry issues +- Elasticsearch download failures + +**Detection**: +- `Could not resolve`, `connection timeout`, `rate limit` +- Service-specific error messages + +**Action**: Wait for service resolution, use mirrors/caches + +### Resource Constraints + +**Memory/Disk Issues**: +- Pattern: `OutOfMemoryError`, `No space left on device` +- Cause: Large test suite, memory leaks, artifact accumulation +- Action: Optimize test memory, clean up artifacts, split jobs + +**Timeout Issues**: +- Pattern: Job cancelled, timeout reached +- Cause: Tests running longer than expected, hung processes +- Action: Investigate slow tests, increase timeout, optimize + +## Workflow Comparison Matrix + +| Aspect | PR | Merge Queue | Trunk | Nightly | +|--------|-----|-------------|--------|---------| +| **Tests** | Filtered subset | Full suite | None (reuses) | Full suite | +| **Duration** | 5-20 min | 30-90 min | 15-30 min | 45-90 min | +| **Purpose** | Fast feedback | Validation | Deployment | Stability | +| **Failure = Code Issue?** | Usually yes | Usually yes | Maybe no | Maybe no | +| **Retry Safe?** | Yes | Yes (check queue) | Yes | Yes | + +## Diagnostic Decision Tree + +``` +Build failed? +├─ Which workflow? +│ ├─ PR → Check compilation, unit tests, lint +│ ├─ Merge Queue → Compare with PR results +│ │ ├─ PR passed → Test filtering issue +│ │ ├─ PR failed → Same issue, expected +│ │ └─ First failure → Check queue, branch sync +│ ├─ Trunk → Check artifact retrieval, deployment +│ └─ Nightly → Likely flaky or infrastructure +│ +├─ Error type? +│ ├─ Compilation → Code issue, fix in PR +│ ├─ Test failure → Check if new or flaky +│ ├─ Timeout → Infrastructure or slow test +│ └─ Dependency → External issue or reproducibility +│ +└─ Historical pattern? + ├─ First time → New issue, recent change + ├─ Intermittent → Flaky test, track + └─ Always fails → Consistent issue, needs fix +``` \ No newline at end of file diff --git a/.claude/skills/cicd-diagnostics/fetch-annotations.py b/.claude/skills/cicd-diagnostics/fetch-annotations.py new file mode 100755 index 000000000000..315fa5fdd488 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/fetch-annotations.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +"""Fetch workflow run annotations (syntax errors, validation failures). + +Usage: python fetch-annotations.py + +Annotations show GitHub Actions workflow syntax validation errors that are +visible in the UI but not in job logs. These explain why jobs were skipped +or never evaluated due to workflow file syntax errors. + +IMPORTANT: GitHub's REST API does NOT expose workflow syntax validation errors. +These errors are only visible in the GitHub UI, so this script scrapes the HTML +directly to extract them. + +Example: python fetch-annotations.py 19131365567 /path/to/.claude/diagnostics/run-19131365567 +""" + +import sys +import json +from pathlib import Path + +# Add utils to path +script_dir = Path(__file__).parent +sys.path.insert(0, str(script_dir / "utils")) + +from html_scraper import scrape_workflow_annotations, save_scraped_annotations, format_scraped_annotations_report + + +def main(): + if len(sys.argv) < 3: + print("ERROR: RUN_ID and WORKSPACE parameters required", file=sys.stderr) + print("Usage: python fetch-annotations.py ", file=sys.stderr) + print("", file=sys.stderr) + print("Example:", file=sys.stderr) + print(" python fetch-annotations.py 19131365567 /path/to/.claude/diagnostics/run-19131365567", file=sys.stderr) + sys.exit(1) + + run_id = sys.argv[1] + workspace = Path(sys.argv[2]) + + # Validate workspace + if not workspace.exists() or not workspace.is_dir(): + print(f"ERROR: Invalid workspace path: {workspace}", file=sys.stderr) + print("Workspace must be a valid directory", file=sys.stderr) + sys.exit(1) + + scraped_file = workspace / "workflow-annotations-scraped.json" + + # Note: We skip the GitHub API because it does NOT return workflow syntax validation errors. + # The API only returns job-level annotations (things that happened during job execution), + # but workflow syntax errors prevent jobs from being created in the first place. + # These errors are only visible in the GitHub UI, so we scrape the HTML directly. + + # Scrape workflow-level annotations from HTML (primary source) + print("=" * 80) + print("Fetching workflow annotations from GitHub UI (HTML)") + print("=" * 80) + print("ℹ️ Note: GitHub API does NOT expose workflow syntax validation errors") + print(" We scrape the HTML directly to find these critical errors") + print() + # Check cache + if scraped_file.exists(): + print(f"✅ Using cached annotations from {scraped_file}") + scraped_data = json.loads(scraped_file.read_text(encoding='utf-8')) + else: + print(f"🌐 Scraping workflow annotations for run {run_id}...") + print("⚠️ WARNING: HTML scraping is fragile and may break if GitHub changes their UI") + print() + scraped_data = scrape_workflow_annotations(run_id) + save_scraped_annotations(run_id, workspace, scraped_data) + + # Display annotations + workflow_annotations = scraped_data.get('workflow_annotations', []) + + if workflow_annotations: + print(f"\n📊 Found {len(workflow_annotations)} annotation(s):") + print(format_scraped_annotations_report(scraped_data)) + else: + if scraped_data.get('error'): + print(f"\n❌ Error during HTML scraping: {scraped_data['error']}") + else: + print(f"\n✅ No workflow syntax errors found") + + # Summary + print("\n" + "=" * 80) + print("SUMMARY") + print("=" * 80) + + total_annotations = len(workflow_annotations) + + print(f"\nTotal annotations found: {total_annotations}") + + # Group by severity + if total_annotations > 0: + by_level = {} + for ann in workflow_annotations: + level = ann.get('level', 'unknown') + by_level[level] = by_level.get(level, 0) + 1 + + for level in ['failure', 'warning', 'notice']: + if level in by_level: + print(f" • {level.capitalize()}: {by_level[level]}") + + if total_annotations == 0: + print("\n✅ No annotations found - workflow syntax is valid!") + else: + print("\n💡 Annotations explain why jobs may have been skipped or never evaluated.") + print(" Workflow syntax errors prevent jobs from being created in the first place.") + + print(f"\nAnnotation data saved to: {scraped_file}") + print("=" * 80) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.claude/skills/cicd-diagnostics/fetch-jobs.py b/.claude/skills/cicd-diagnostics/fetch-jobs.py new file mode 100755 index 000000000000..d5aecfe65c54 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/fetch-jobs.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +"""Fetch job details with caching.""" + +import sys +import json +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent / "utils")) +from github_api import get_jobs_detailed + + +def main(): + if len(sys.argv) < 3: + print("Usage: python fetch-jobs.py ", file=sys.stderr) + sys.exit(1) + + run_id = sys.argv[1] + workspace = Path(sys.argv[2]) + + if not workspace: + print("ERROR: WORKSPACE parameter is required", file=sys.stderr) + sys.exit(1) + + jobs_file = workspace / "jobs-detailed.json" + + # Fetch jobs if not cached + if not jobs_file.exists(): + print("Fetching job details...") + get_jobs_detailed(run_id, jobs_file) + print(f"✓ Job details saved to {jobs_file}") + else: + print(f"✓ Using cached jobs: {jobs_file}") + + # Display failed jobs + print("") + print("=== Failed Jobs ===") + jobs_data = json.loads(jobs_file.read_text(encoding='utf-8')) + jobs = jobs_data.get('jobs', []) + + for job in jobs: + if job.get('conclusion') == 'failure': + print(f"Name: {job.get('name')}") + print(f"ID: {job.get('id')}") + print(f"Conclusion: {job.get('conclusion')}") + print("") + + +if __name__ == "__main__": + main() + + diff --git a/.claude/skills/cicd-diagnostics/fetch-logs.py b/.claude/skills/cicd-diagnostics/fetch-logs.py new file mode 100755 index 000000000000..311dc32fd2ed --- /dev/null +++ b/.claude/skills/cicd-diagnostics/fetch-logs.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +"""Fetch failed job logs with caching.""" + +import sys +import json +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent / "utils")) +from github_api import download_job_logs, get_failed_jobs + + +def format_size(size_bytes: int) -> str: + """Format size in human-readable format.""" + for unit in ['B', 'KB', 'MB', 'GB']: + if size_bytes < 1024.0: + return f"{size_bytes:.1f}{unit}" + size_bytes /= 1024.0 + return f"{size_bytes:.1f}TB" + + +def main(): + if len(sys.argv) < 3: + print("Usage: python fetch-logs.py [JOB_ID]", file=sys.stderr) + print("", file=sys.stderr) + print("Example:", file=sys.stderr) + print(" python fetch-logs.py 19219835536 /path/to/workspace", file=sys.stderr) + print(" python fetch-logs.py 19219835536 /path/to/workspace 54939324205", file=sys.stderr) + sys.exit(1) + + run_id = sys.argv[1] + workspace_path = sys.argv[2] + + # Optional job ID parameter + specific_job_id = sys.argv[3] if len(sys.argv) > 3 else None + + # Validate parameters are not swapped (workspace should be a path, not just digits) + # A workspace path will contain slashes or be a relative path like "workspace" + # A job ID will be only digits + if workspace_path.isdigit() and len(workspace_path) > 10: + print(f"ERROR: WORKSPACE parameter appears to be a job ID: {workspace_path}", file=sys.stderr) + print("", file=sys.stderr) + print("Correct usage: python fetch-logs.py [JOB_ID]", file=sys.stderr) + print(f" RUN_ID: {run_id}", file=sys.stderr) + print(f" WORKSPACE_PATH: should be a directory path (e.g., /path/to/workspace)", file=sys.stderr) + print(f" JOB_ID (optional): {workspace_path} <- you may have meant this as job ID", file=sys.stderr) + sys.exit(1) + + workspace = Path(workspace_path) + + if not workspace.exists(): + print(f"ERROR: Workspace directory does not exist: {workspace}", file=sys.stderr) + print(f"", file=sys.stderr) + print(f"Make sure the workspace path is correct. You passed:", file=sys.stderr) + print(f" RUN_ID: {run_id}", file=sys.stderr) + print(f" WORKSPACE: {workspace_path}", file=sys.stderr) + if specific_job_id: + print(f" JOB_ID: {specific_job_id}", file=sys.stderr) + sys.exit(1) + + jobs_file = workspace / "jobs-detailed.json" + if not jobs_file.exists(): + print(f"ERROR: Jobs file not found: {jobs_file}", file=sys.stderr) + print("Run fetch-jobs.py first to get job details.", file=sys.stderr) + sys.exit(1) + + # Get failed jobs + failed_jobs = get_failed_jobs(jobs_file) + + if not failed_jobs: + print("No failed jobs found.") + return + + # If specific job ID provided, filter to that job + if specific_job_id: + failed_jobs = [job for job in failed_jobs if str(job['id']) == specific_job_id] + if not failed_jobs: + print(f"ERROR: Job {specific_job_id} not found or not failed", file=sys.stderr) + sys.exit(1) + + # Download logs for each failed job + for job in failed_jobs: + job_id = str(job['id']) + job_name = job.get('name', 'Unknown') + log_file = workspace / f"failed-job-{job_id}.txt" + + # Download logs if not cached or empty + if not log_file.exists() or log_file.stat().st_size == 0: + print(f"Downloading logs for job {job_id} ({job_name})...") + try: + download_job_logs(job_id, log_file) + size = log_file.stat().st_size + print(f"✓ Downloaded: {format_size(size)} -> {log_file}") + except Exception as e: + print(f"✗ Failed to download logs for job {job_id}: {e}", file=sys.stderr) + else: + size = log_file.stat().st_size + print(f"✓ Using cached logs: {format_size(size)} -> {log_file}") + + +if __name__ == "__main__": + main() + + diff --git a/.claude/skills/cicd-diagnostics/fetch-metadata.py b/.claude/skills/cicd-diagnostics/fetch-metadata.py new file mode 100755 index 000000000000..e49d890322c7 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/fetch-metadata.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +"""Fetch workflow metadata with caching.""" + +import sys +import json +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent / "utils")) +from github_api import get_run_metadata + + +def main(): + if len(sys.argv) < 3: + print("Usage: python fetch-metadata.py ", file=sys.stderr) + sys.exit(1) + + run_id = sys.argv[1] + workspace = Path(sys.argv[2]) + + if not workspace: + print("ERROR: WORKSPACE parameter is required", file=sys.stderr) + sys.exit(1) + + metadata_file = workspace / "run-metadata.json" + + # Fetch metadata if not cached + if not metadata_file.exists(): + print("Fetching run metadata...") + get_run_metadata(run_id, metadata_file) + print(f"✓ Metadata saved to {metadata_file}") + else: + print(f"✓ Using cached metadata: {metadata_file}") + + # Display metadata + metadata = json.loads(metadata_file.read_text(encoding='utf-8')) + print(json.dumps(metadata, indent=2)) + + +if __name__ == "__main__": + main() + + diff --git a/.claude/skills/cicd-diagnostics/init-diagnostic.py b/.claude/skills/cicd-diagnostics/init-diagnostic.py new file mode 100755 index 000000000000..7ca67e03483a --- /dev/null +++ b/.claude/skills/cicd-diagnostics/init-diagnostic.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +"""Initialize diagnostic environment. + +Usage: python init-diagnostic.py +Returns: Sets WORKSPACE environment variable and loads all utilities +""" + +import sys +import os +from pathlib import Path + +# Add utils to path +script_dir = Path(__file__).parent +sys.path.insert(0, str(script_dir / "utils")) + +from workspace import get_diagnostic_workspace + + +def main(): + if len(sys.argv) < 2: + print("ERROR: Run ID required", file=sys.stderr) + print("Usage: python init-diagnostic.py ", file=sys.stderr) + sys.exit(1) + + run_id = sys.argv[1] + + # Create workspace + workspace = get_diagnostic_workspace(run_id) + + print("✅ Diagnostic environment initialized") + print(f" RUN_ID: {run_id}") + print(f" WORKSPACE: {workspace}") + + # Export for shell usage + print(f"\nexport RUN_ID={run_id}") + print(f"export WORKSPACE={workspace}") + + +if __name__ == "__main__": + main() + + diff --git a/.claude/skills/cicd-diagnostics/requirements.txt b/.claude/skills/cicd-diagnostics/requirements.txt new file mode 100644 index 000000000000..7e58241804ba --- /dev/null +++ b/.claude/skills/cicd-diagnostics/requirements.txt @@ -0,0 +1,17 @@ +# Python dependencies for cicd-diagnostics skill +# No external dependencies required - uses standard library and GitHub CLI + +# Note: This skill uses the GitHub CLI (gh) which must be installed separately +# The skill uses Python 3.8+ standard library modules: +# - subprocess (for GitHub CLI calls) +# - json (for JSON parsing) +# - re (for regex) +# - pathlib (for file operations) +# - os, sys (standard system modules) + +# If you need to install GitHub CLI: +# macOS: brew install gh +# Linux: See https://github.com/cli/cli/blob/trunk/docs/install_linux.md +# Windows: See https://github.com/cli/cli/blob/trunk/docs/install_windows.md + + diff --git a/.claude/skills/cicd-diagnostics/utils/README.md b/.claude/skills/cicd-diagnostics/utils/README.md new file mode 100644 index 000000000000..182bd7ca6e81 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/README.md @@ -0,0 +1,293 @@ +# CI/CD Diagnostics Utility Functions + +Reusable Python utility modules for CI/CD failure analysis. + +## Overview + +This directory contains modular Python utility modules extracted from the cicd-diagnostics skill. These modules can be imported and used by the skill or other automation scripts. + +## Files + +### github_api.py +GitHub API and CLI wrapper functions for fetching workflow, job, and issue data. + +**Key Functions:** +- `extract_run_id(url)` - Extract run ID from GitHub Actions URL +- `extract_pr_number(input)` - Extract PR number from URL or branch name +- `get_run_metadata(run_id, output_file)` - Fetch workflow run details +- `get_jobs_detailed(run_id, output_file)` - Get all jobs with step information +- `get_failed_jobs(jobs_file)` - Filter failed jobs from jobs file +- `download_job_logs(job_id, output_file)` - Download job logs +- `get_pr_info(pr_num, output_file)` - Get PR details and status checks +- `find_failed_run_from_pr(pr_info_file)` - Find failed run from PR data +- `get_recent_runs(workflow_name, limit, output_file)` - Fetch workflow history +- `search_issues(query, output_file)` - Search GitHub issues +- `compare_commits(base_sha, head_sha, output_file)` - Compare commit ranges + +**Usage Example:** +```python +import sys +from pathlib import Path +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from github_api import extract_run_id, get_run_metadata + +run_id = extract_run_id("https://github.com/dotCMS/core/actions/runs/19118302390") +get_run_metadata(run_id, Path("run-metadata.json")) +``` + +### workspace.py +Diagnostic workspace management with caching and artifact organization. + +**Key Functions:** +- `create_diagnostic_workspace(run_id)` - Create workspace directory +- `find_existing_diagnostic(run_id)` - Check for cached diagnostics +- `get_diagnostic_workspace(run_id, force_clean=False)` - Get or create workspace (with caching) +- `save_artifact(diagnostic_dir, filename, content)` - Save artifact to workspace +- `artifact_exists(diagnostic_dir, filename)` - Check if artifact is cached +- `get_or_fetch_artifact(diagnostic_dir, filename, fetch_command)` - Cache-aware fetching +- `ensure_gitignore_diagnostics()` - Add diagnostic dirs to .gitignore +- `list_diagnostic_workspaces()` - List all diagnostic sessions +- `clean_old_diagnostics(max_age_hours=168, max_count=50)` - Cleanup old workspaces +- `get_workspace_summary(diagnostic_dir)` - Display workspace details + +**Usage Example:** +```python +import sys +from pathlib import Path +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from workspace import get_diagnostic_workspace, save_artifact + +diagnostic_dir = get_diagnostic_workspace("19118302390") +save_artifact(diagnostic_dir, "notes.txt", "Analysis in progress...") +``` + +### evidence.py +Evidence presentation for AI analysis - simple data extraction without classification logic. + +**Key Functions:** +- `present_failure_evidence(log_file)` - Present all failure evidence (supports JUnit, E2E, **Postman**) +- `get_first_error_context(log_file, before=30, after=20)` - Get context around first error +- `get_failure_timeline(log_file)` - Get timeline of all failures +- `present_known_issues(test_name, error_keywords="")` - Search and present known issues +- `present_recent_runs(workflow, limit=10)` - Get recent workflow run history +- `extract_test_name(log_file)` - Extract test name from log file (JUnit/E2E/Postman) +- `extract_error_keywords(log_file)` - Extract error keywords for pattern matching +- `present_complete_diagnostic(log_file)` - Present complete diagnostic package +- `extract_error_sections_only(log_file, output_file)` - Extract only error sections for large files +- `get_log_stats(log_file)` - Get log file statistics + +**Postman Test Support (NEW in v2.1)**: +- Detects `[INFO] \d+\. AssertionError` patterns +- Extracts "expected [...] to deeply equal [...]" assertions +- Identifies failing collections and test names +- Provides context around Postman failures + +**Usage Example:** +```python +import sys +from pathlib import Path +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from evidence import present_complete_diagnostic, get_log_stats + +log_file = Path("job-logs.txt") +print(get_log_stats(log_file)) +evidence = present_complete_diagnostic(log_file) +print(evidence) +``` + +### tiered_extraction.py +Tiered evidence extraction - creates multiple levels of detail for progressive analysis. + +**Key Functions:** +- `extract_level1_summary(log_file, output_file)` - Level 1: Test Summary (~500 tokens) +- `extract_level2_unique_failures(log_file, output_file)` - Level 2: Unique Failures (~5000 tokens) +- `extract_level3_full_context(log_file, output_file)` - Level 3: Full Context (~15000 tokens) +- `extract_failed_test_names(log_file)` - Extract failed test names (JUnit/E2E/Postman) +- `auto_extract_tiered(log_file, workspace)` - Auto-tiered extraction based on log size +- `analyze_retry_patterns(log_file)` - Analyze retry patterns (deterministic vs flaky) +- `extract_postman_failures(log_file, output_file)` - **NEW**: Postman-specific extraction + +**Postman Extraction (NEW in v2.1)**: +- Parses Newman/Postman test output format +- Extracts test summary table (executed/failed counts) +- Identifies failed collections +- Provides detailed failure context with line numbers +- Lists all failed test names from "inside" patterns + +**Usage Example:** +```python +import sys +from pathlib import Path +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from tiered_extraction import auto_extract_tiered, analyze_retry_patterns + +log_file = Path("job-logs.txt") +workspace = Path(".claude/diagnostics/run-12345") + +auto_extract_tiered(log_file, workspace) +print(analyze_retry_patterns(log_file)) +``` + +## Integration with cicd-diagnostics Skill + +The main SKILL.md references these utilities throughout the diagnostic workflow: + +```python +import sys +from pathlib import Path +sys.path.insert(0, str(Path(".claude/skills/cicd-diagnostics/utils"))) + +from workspace import get_diagnostic_workspace +from github_api import get_run_metadata +from evidence import present_complete_diagnostic + +# Initialize workspace +diagnostic_dir = get_diagnostic_workspace("19118302390") + +# Fetch metadata +get_run_metadata("19118302390", diagnostic_dir / "run-metadata.json") + +# Analyze logs +log_file = diagnostic_dir / "failed-job-12345.txt" +evidence = present_complete_diagnostic(log_file) +``` + +## Benefits of Modular Design + +1. **Reusability** - Modules can be used by other skills or scripts +2. **Testability** - Each utility can be tested independently +3. **Maintainability** - Changes isolated to specific utility files +4. **Clarity** - Main skill logic is cleaner and more readable +5. **Composability** - Functions can be combined in different workflows +6. **Cross-platform** - Python works on macOS, Linux, and Windows + +## Platform Compatibility + +All utilities use Python standard library (Python 3.8+): +- `pathlib` for cross-platform file paths +- `subprocess` for GitHub CLI calls +- `json` for JSON parsing +- `re` for regex operations +- No external Python dependencies required + +## Error Handling + +All utilities use Python exception handling: +- Functions raise exceptions on errors +- Type hints for better IDE support +- Clear error messages for debugging + +## Dependencies + +- Python 3.8 or higher +- GitHub CLI (gh) - must be installed separately +- Standard library only - no external Python packages required + +## Script Organization & Best Practices + +### Directory Structure +``` +cicd-diagnostics/ +├── init-diagnostic.py # ✅ Entry Point: CLI script +├── fetch-metadata.py # ✅ Entry Point: CLI script +├── fetch-jobs.py # ✅ Entry Point: CLI script +├── fetch-logs.py # ✅ Entry Point: CLI script +│ +└── utils/ # ✅ Library: Reusable utilities + ├── __init__.py + ├── github_api.py # GitHub API wrappers + ├── evidence.py # Evidence extraction + ├── tiered_extraction.py # Multi-level analysis + └── workspace.py # Workspace management +``` + +### Design Principles + +**✅ Root Level = Entry Points (User-Facing)** +- Accept command-line arguments +- Show usage messages +- Orchestrate workflows +- Import from utils/ +- Exit with status codes + +**✅ utils/ = Library (Developer-Facing)** +- Pure functions +- No CLI argument parsing +- Raise exceptions (don't exit) +- Type hints and docstrings +- Fully testable + +### Example Comparison + +**❌ BAD: Mixing Concerns** +```python +# utils/github_api.py (WRONG - has CLI parsing) +def download_logs(): + if len(sys.argv) < 2: + print("Usage: ...") # ❌ CLI logic in library + sys.exit(1) # ❌ Exit from library + job_id = sys.argv[1] # ❌ Argument parsing in library + ... +``` + +**✅ GOOD: Separation of Concerns** +```python +# utils/github_api.py (CORRECT - pure function) +def download_job_logs(job_id: str, output_file: Path) -> None: + """Download logs for a specific job. + + Args: + job_id: GitHub Actions job ID + output_file: Path to save logs + + Raises: + subprocess.CalledProcessError: If gh CLI fails + """ + result = subprocess.run([...], check=True) + output_file.write_text(result.stdout) + +# fetch-logs.py (CORRECT - CLI orchestration) +def main(): + if len(sys.argv) < 3: + print("Usage: python fetch-logs.py ") + sys.exit(1) + + from utils.github_api import download_job_logs + download_job_logs(sys.argv[1], Path(sys.argv[2])) + +if __name__ == "__main__": + main() +``` + +### Why This Structure? + +| Aspect | Entry Points (Root) | Utilities (utils/) | +|--------|--------------------|--------------------| +| **Purpose** | User interface | Reusable logic | +| **Testability** | Hard (needs CLI mocking) | Easy (pure functions) | +| **Reusability** | Low (specific to one workflow) | High (used by multiple scripts) | +| **Complexity** | Simple orchestration | Complex business logic | +| **Error Handling** | Print & exit | Raise exceptions | +| **Documentation** | Usage messages | Docstrings + type hints | + +### Version History + +**v2.1.0** (Current) +- ✅ Enhanced Postman/Newman test detection +- ✅ Added `extract_postman_failures()` to tiered_extraction.py +- ✅ Fixed `fetch-logs.py` argument parsing (now supports optional job ID) +- ✅ Improved assertion detection for API tests in evidence.py + +**v2.0.0** +- ✅ Converted from Bash to Python +- ✅ Separated entry points from utilities +- ✅ Added tiered extraction for large logs +- ✅ Enhanced known issue searching + +**v1.0.0** (Legacy Bash) +- Basic log extraction +- Limited test framework support diff --git a/.claude/skills/cicd-diagnostics/utils/__init__.py b/.claude/skills/cicd-diagnostics/utils/__init__.py new file mode 100755 index 000000000000..50fbaee4dcb2 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/__init__.py @@ -0,0 +1,5 @@ +"""CI/CD Diagnostics Utilities - Python modules for GitHub Actions failure analysis.""" + +__version__ = "2.1.0" + + diff --git a/.claude/skills/cicd-diagnostics/utils/evidence.py b/.claude/skills/cicd-diagnostics/utils/evidence.py new file mode 100755 index 000000000000..d5f1417c660b --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/evidence.py @@ -0,0 +1,882 @@ +#!/usr/bin/env python3 +"""Evidence Presentation for AI Analysis. + +Simple data extraction without classification logic. +""" + +import json +import re +import subprocess +from pathlib import Path +from typing import Optional + + +def present_failure_evidence(log_file: Path) -> str: + """Present all failure evidence for AI analysis. + + Args: + log_file: Path to log file + + Returns: + Formatted evidence string + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=" * 80) + output.append("FAILURE EVIDENCE FOR ANALYSIS") + output.append("=" * 80) + output.append("") + + # Test Failures + output.append("=== FAILED TESTS ===") + output.append("") + failed_tests = [ + line for line in lines + if "<<< FAILURE!" in line or "::error file=" in line + ][:10] + + # Add Postman failures + postman_failures = [] + for i, line in enumerate(lines): + if re.search(r'\[INFO\]\s+\d+\.\s+(AssertionError|AssertionFailure)', line): + # Get context around the failure + start = max(0, i - 2) + end = min(len(lines), i + 5) + postman_failures.extend(lines[start:end]) + postman_failures.append("") # Add separator + if len(postman_failures) >= 50: + break + + if failed_tests or postman_failures: + if failed_tests: + output.append("JUnit/E2E Failures:") + output.extend(failed_tests) + output.append("") + if postman_failures: + output.append("Postman/API Test Failures:") + output.extend(postman_failures[:50]) + else: + output.append("No test failures found") + + output.append("") + output.append("=== ERROR MESSAGES ===") + output.append("") + errors = [] + + # Enhanced error detection for NPM, Docker, and GitHub Actions errors + # Prioritize critical deployment/build errors + critical_keywords = [ + "npm ERR!", "::error::", "##[error]", + "FAILURE:", "Failed to", "Cannot", "Unable to", + "Error:", "ERROR:" + ] + + test_error_keywords = [ + "[ERROR]", "AssertionError", "Exception" + ] + + # First pass: capture critical deployment/infrastructure errors + # Scan entire log for critical errors (don't stop early) + critical_errors = [] + for i, line in enumerate(lines): + # Skip false positives: file listings from tar/zip archives + # These are lines that ONLY list filenames without actual error context + # Pattern: timestamp + path + filename.class (no error keywords) + is_file_listing = ( + ('.class' in line or '.jar' in line) and + ('maven/dotserver' in line or 'webapps/ROOT' in line) and + not any(err_word in line for err_word in ['ERROR:', 'FAILURE:', 'Failed', 'Exception:']) + ) + + if is_file_listing: + continue + + if any(keyword in line for keyword in critical_keywords): + start = max(0, i - 5) + end = min(len(lines), i + 10) # More context for deployment errors + critical_errors.append((i, lines[start:end])) + + # Prioritize later errors (usually final failures) and unique error types + if critical_errors: + # Take last 5 error groups (most recent/final errors) + for _, error_lines in critical_errors[-10:]: + errors.extend(error_lines) + errors.append("") # Separator + + # Second pass: if no critical errors found, look for test errors + if not errors: + for i, line in enumerate(lines): + # Same file listing filter as first pass + is_file_listing = ( + ('.class' in line or '.jar' in line) and + ('maven/dotserver' in line or 'webapps/ROOT' in line) and + not any(err_word in line for err_word in ['ERROR:', 'FAILURE:', 'Failed', 'Exception:']) + ) + + if is_file_listing: + continue + + if any(keyword in line for keyword in test_error_keywords): + start = max(0, i - 3) + end = min(len(lines), i + 6) + errors.extend(lines[start:end]) + if len(errors) >= 100: + break + + if errors: + output.extend(errors[:150]) # Allow more errors to be shown + else: + output.append("No explicit errors found") + + output.append("") + output.append("=== ASSERTION DETAILS ===") + output.append("") + assertions = [ + line for line in lines + if "expected:" in line and "but was:" in line or "AssertionFailedError" in line + ][:10] + + # Add Postman assertion details + postman_assertions = [] + for i, line in enumerate(lines): + if re.search(r'(expected.*to deeply equal|expected.*to be|expected.*but was)', line, re.IGNORECASE): + postman_assertions.append(line) + if len(postman_assertions) >= 10: + break + + if assertions or postman_assertions: + if assertions: + output.append("JUnit Assertions:") + output.extend(assertions) + output.append("") + if postman_assertions: + output.append("Postman Assertions:") + output.extend(postman_assertions) + else: + output.append("No assertion failures found") + + output.append("") + output.append("=== STACK TRACES ===") + output.append("") + stack_pattern = re.compile(r'at [a-zA-Z0-9.]+\([A-Za-z0-9]+\.java:\d+\)') + stacks = [line for line in lines if stack_pattern.search(line)][:30] + if stacks: + output.extend(stacks) + else: + output.append("No Java stack traces found") + + output.append("") + output.append("=== TIMING INDICATORS ===") + output.append("") + timing_keywords = ["timeout", "timed out", "Thread.sleep", "Awaitility", "race condition", "concurrent"] + timing = [ + line for line in lines + if any(keyword.lower() in line.lower() for keyword in timing_keywords) + ][:10] + if timing: + output.extend(timing) + else: + output.append("No obvious timing indicators") + + output.append("") + output.append("=== INFRASTRUCTURE INDICATORS ===") + output.append("") + infra_keywords = ["connection refused", "docker", "container", "failed", "elasticsearch", "exception", "database", "error"] + infra = [ + line for line in lines + if any(keyword.lower() in line.lower() for keyword in infra_keywords) + ][:10] + if infra: + output.extend(infra) + else: + output.append("No obvious infrastructure issues") + + output.append("") + output.append("=" * 80) + + return "\n".join(output) + + +def get_first_error_context(log_file: Path, before: int = 30, after: int = 20) -> str: + """Get context around first error (for cascade detection). + + Args: + log_file: Path to log file + before: Number of lines before error + after: Number of lines after error + + Returns: + Context string + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + first_error_line = None + for i, line in enumerate(lines, 1): + if any(keyword in line for keyword in ["[ERROR]", "FAILURE!", "::error"]): + first_error_line = i + break + + if first_error_line is None: + return "No errors found in log" + + start = max(0, first_error_line - before - 1) + end = min(len(lines), first_error_line + after) + + output = [f"=== FIRST ERROR AT LINE {first_error_line} ===", ""] + for i, line in enumerate(lines[start:end], start=start + 1): + output.append(f"{i:6d}: {line}") + + return "\n".join(output) + + +def get_failure_timeline(log_file: Path) -> str: + """Get timeline of all failures (for cascade analysis). + + Args: + log_file: Path to log file + + Returns: + Timeline string + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = ["=== FAILURE TIMELINE ===", ""] + + failures = [] + for i, line in enumerate(lines, 1): + if any(keyword in line for keyword in ["[ERROR]", "FAILURE!", "::error"]): + content = line[:100] if len(line) > 100 else line + failures.append((i, content)) + if len(failures) >= 20: + break + + for line_num, content in failures: + output.append(f"Line {line_num}: {content}") + + return "\n".join(output) + + +def present_known_issues(test_name: str, error_keywords: str = "") -> str: + """Present known issues for comparison (ENHANCED). + + Args: + test_name: Name of the test + error_keywords: Optional error keywords for pattern matching + + Returns: + Formatted issues string + """ + output = [] + output.append("=== KNOWN ISSUES SEARCH ===") + output.append("") + output.append(f"Searching for: {test_name}") + if error_keywords: + output.append(f"Error keywords: {error_keywords}") + output.append("") + + # Strategy 1: Exact test name match + output.append("Strategy 1: Exact test name match") + try: + result = subprocess.run( + [ + "gh", "issue", "list", + "--search", f'"{test_name}" in:body', + "--state", "all", + "--label", "Flakey Test", + "--json", "number,title,state", + "--limit", "5" + ], + capture_output=True, + text=True, + check=True + ) + exact_match = json.loads(result.stdout) if result.stdout else [] + except (subprocess.CalledProcessError, json.JSONDecodeError): + exact_match = [] + + if exact_match: + output.append(" EXACT MATCHES:") + for issue in exact_match: + output.append(f" - Issue #{issue['number']}: {issue['title']} [{issue['state']}]") + else: + output.append(" No exact matches") + output.append("") + + # Strategy 2: Test class name match + output.append("Strategy 2: Test class name match") + test_class = test_name.split('.')[0] if '.' in test_name else test_name + try: + result = subprocess.run( + [ + "gh", "issue", "list", + "--search", f'"{test_class}" in:body', + "--state", "all", + "--label", "Flakey Test", + "--json", "number,title,state", + "--limit", "10" + ], + capture_output=True, + text=True, + check=True + ) + class_match = json.loads(result.stdout) if result.stdout else [] + except (subprocess.CalledProcessError, json.JSONDecodeError): + class_match = [] + + # Deduplicate with exact matches + exact_numbers = {issue['number'] for issue in exact_match} + new_class_matches = [issue for issue in class_match if issue['number'] not in exact_numbers] + + if new_class_matches: + output.append(" CLASS NAME MATCHES:") + for issue in new_class_matches: + output.append(f" - Issue #{issue['number']}: {issue['title']} [{issue['state']}]") + else: + output.append(" No additional class matches") + output.append("") + + # Strategy 3: Error pattern/keyword match + if error_keywords: + output.append(f"Strategy 3: Error pattern match ({error_keywords})") + try: + result = subprocess.run( + [ + "gh", "issue", "list", + "--search", f"{error_keywords} in:body", + "--state", "all", + "--label", "Flakey Test", + "--json", "number,title,state,body", + "--limit", "15" + ], + capture_output=True, + text=True, + check=True + ) + pattern_match = json.loads(result.stdout) if result.stdout else [] + except (subprocess.CalledProcessError, json.JSONDecodeError): + pattern_match = [] + + # Deduplicate + all_numbers = exact_numbers | {issue['number'] for issue in new_class_matches} + new_pattern_matches = [issue for issue in pattern_match if issue['number'] not in all_numbers] + + if new_pattern_matches: + output.append(" PATTERN MATCHES:") + for issue in new_pattern_matches: + output.append(f" - Issue #{issue['number']}: {issue['title']} [{issue['state']}]") + output.append("") + output.append(" Pattern match details (showing first 200 chars from body):") + for issue in new_pattern_matches: + body_preview = issue.get('body', '')[:200].replace('\n', ' ') + output.append(f" #{issue['number']}: {body_preview}...") + else: + output.append(" No additional pattern matches") + output.append("") + + # Strategy 4: CLI test issues + if "cli" in test_name.lower() or "command" in test_name.lower(): + output.append("Strategy 4: CLI-related flaky tests") + try: + result = subprocess.run( + [ + "gh", "issue", "list", + "--search", "cli in:body", + "--state", "all", + "--label", "Flakey Test", + "--json", "number,title,state", + "--limit", "10" + ], + capture_output=True, + text=True, + check=True + ) + cli_match = json.loads(result.stdout) if result.stdout else [] + except (subprocess.CalledProcessError, json.JSONDecodeError): + cli_match = [] + + if cli_match: + output.append(" CLI-RELATED:") + for issue in cli_match: + output.append(f" - Issue #{issue['number']}: {issue['title']} [{issue['state']}]") + else: + output.append(" No CLI-related matches") + output.append("") + + # Summary + total_exact = len(exact_match) + total_class = len(new_class_matches) + total_pattern = len(new_pattern_matches) if error_keywords else 0 + total = total_exact + total_class + total_pattern + + output.append("=== SEARCH SUMMARY ===") + output.append(f"Total potential matches: {total}") + output.append(f" - Exact matches: {total_exact}") + output.append(f" - Class matches: {total_class}") + if error_keywords: + output.append(f" - Pattern matches: {total_pattern}") + output.append("") + + return "\n".join(output) + + +def present_recent_runs(workflow: str, limit: int = 10) -> str: + """Get recent workflow run history. + + Args: + workflow: Workflow name + limit: Maximum number of runs to fetch + + Returns: + Formatted runs string + """ + try: + result = subprocess.run( + [ + "gh", "run", "list", + "--workflow", workflow, + "--limit", str(limit), + "--json", "databaseId,conclusion,displayTitle,createdAt" + ], + capture_output=True, + text=True, + check=True + ) + runs = json.loads(result.stdout) if result.stdout else [] + except (subprocess.CalledProcessError, json.JSONDecodeError): + runs = [] + + output = [] + output.append(f"=== RECENT RUNS: {workflow} ===") + output.append("") + + if not runs: + output.append("No recent runs found") + else: + for run in runs: + output.append( + f"{run['databaseId']} | {run['conclusion']} | {run['displayTitle']} | {run['createdAt']}" + ) + + output.append("") + + # Calculate failure rate + if runs: + total = len(runs) + failures = sum(1 for run in runs if run.get('conclusion') == 'failure') + if total > 0: + rate = (failures * 100) // total + output.append(f"Failure rate: {failures}/{total} ({rate}%)") + + return "\n".join(output) + + +def extract_test_name(log_file: Path) -> str: + """Extract test name from log file. + + Args: + log_file: Path to log file + + Returns: + Test name or empty string + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + # Try JUnit test + for line in lines: + if "<<< FAILURE!" in line: + match = re.search(r'\[ERROR\] ([^\s]+)', line) + if match: + return match.group(1).split('.')[0] + + # Try E2E test + for line in lines: + if "::error file=" in line: + match = re.search(r'file=([^,]+)', line) + if match: + file_path = match.group(1) + return Path(file_path).stem.replace('.spec', '') + + # Try Postman + for line in lines: + if "Collection" in line and "had failures" in line: + match = re.search(r'Collection ([^\s]+) had failures', line) + if match: + return match.group(1) + + return "" + + +def extract_error_keywords(log_file: Path) -> str: + """Extract error keywords for pattern matching. + + Args: + log_file: Path to log file + + Returns: + Space-separated keywords + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore').lower() + + keywords = [] + + if "moddate" in log_content or "modification date" in log_content: + keywords.append("modDate") + if "createddate" in log_content or "created date" in log_content or "creationdate" in log_content: + keywords.append("createdDate") + if "race condition" in log_content or "concurrent" in log_content or "synchronization" in log_content: + keywords.append("timing") + if "timeout" in log_content or "timed out" in log_content: + keywords.append("timeout") + if "ordering" in log_content or "order by" in log_content or "sorted" in log_content: + keywords.append("ordering") + if re.search(r'boolean.*flip|expected:.*true.*but was:.*false|expected:.*false.*but was:.*true', log_content): + keywords.append("assertion") + + return " ".join(keywords) + + +def present_workflow_annotations(annotations_file: Path) -> str: + """Present workflow run annotations (syntax errors, validation failures). + + Args: + annotations_file: Path to annotations JSON file + + Returns: + Formatted annotations string + """ + if not annotations_file.exists(): + return "=== WORKFLOW ANNOTATIONS ===\n\nNo annotations file found (run fetch-annotations.py to check for syntax errors)" + + try: + annotations = json.loads(annotations_file.read_text(encoding='utf-8')) + except (json.JSONDecodeError, IOError): + return "=== WORKFLOW ANNOTATIONS ===\n\nError reading annotations file" + + output = [] + output.append("=== WORKFLOW ANNOTATIONS ===") + output.append("") + + if not annotations: + output.append("✅ No workflow syntax errors or validation failures detected") + output.append("") + output.append("All jobs were evaluated normally. If jobs were skipped, it was due to") + output.append("conditional logic (if/needs), not workflow file syntax errors.") + return "\n".join(output) + + # We have annotations - this is critical! + output.append(f"🚨 CRITICAL: Found {len(annotations)} workflow annotation(s)") + output.append("") + output.append("Workflow annotations indicate syntax errors or validation failures in the") + output.append("workflow YAML file. These errors prevent jobs from being evaluated and") + output.append("are visible in the GitHub UI but NOT in job logs.") + output.append("") + + # Group by severity + by_level = {} + for annotation in annotations: + level = annotation.get('annotation_level', 'unknown') + if level not in by_level: + by_level[level] = [] + by_level[level].append(annotation) + + # Display failures first (most critical) + for level in ['failure', 'error', 'warning', 'notice', 'unknown']: + if level not in by_level: + continue + + level_annotations = by_level[level] + output.append(f"{'=' * 40}") + output.append(f"{level.upper()} ({len(level_annotations)} annotation(s))") + output.append(f"{'=' * 40}") + output.append("") + + for i, annotation in enumerate(level_annotations, 1): + path = annotation.get('path', 'unknown') + line = annotation.get('start_line', '?') + col = annotation.get('start_column', '?') + end_line = annotation.get('end_line', line) + title = annotation.get('title', 'No title') + message = annotation.get('message', 'No message') + raw_details = annotation.get('raw_details', '') + + output.append(f"Annotation {i}:") + output.append(f" File: {path}") + output.append(f" Location: Line {line}, Col {col}" + (f" - Line {end_line}" if end_line != line else "")) + output.append(f" Title: {title}") + output.append(f" Message: {message}") + + if raw_details: + output.append(f" Details:") + # Indent raw details + for detail_line in raw_details.split('\n')[:10]: # Limit to 10 lines + output.append(f" {detail_line}") + + output.append("") + + output.append("=" * 80) + output.append("IMPACT ANALYSIS") + output.append("=" * 80) + output.append("") + output.append("When workflow syntax errors exist:") + output.append(" • Jobs may be marked as 'skipped' even though they never ran") + output.append(" • The workflow run may show as 'completed' despite not running all jobs") + output.append(" • Error details are ONLY visible via annotations API, not in logs") + output.append(" • Fix the syntax error and re-run the workflow") + output.append("") + + return "\n".join(output) + + +def present_job_state_analysis(jobs_file: Path, annotations_file: Optional[Path] = None) -> str: + """Present analysis of job states (failed/skipped/never-evaluated). + + Args: + jobs_file: Path to jobs JSON file + annotations_file: Optional path to annotations JSON file + + Returns: + Formatted job state analysis string + """ + try: + jobs_data = json.loads(jobs_file.read_text(encoding='utf-8')) + jobs = jobs_data.get('jobs', []) + except (json.JSONDecodeError, IOError): + return "=== JOB STATE ANALYSIS ===\n\nError reading jobs file" + + # Check for annotations + has_syntax_errors = False + if annotations_file and annotations_file.exists(): + try: + annotations = json.loads(annotations_file.read_text(encoding='utf-8')) + has_syntax_errors = len(annotations) > 0 + except (json.JSONDecodeError, IOError): + pass + + output = [] + output.append("=== JOB STATE ANALYSIS ===") + output.append("") + + # Categorize jobs + categorized = { + 'failed': [], + 'skipped': [], + 'cancelled': [], + 'success': [], + 'in_progress': [], + 'queued': [], + 'other': [] + } + + for job in jobs: + conclusion = job.get('conclusion') + status = job.get('status') + job_summary = { + 'name': job.get('name', 'Unknown'), + 'id': job.get('id'), + 'conclusion': conclusion, + 'status': status + } + + if conclusion == 'failure': + categorized['failed'].append(job_summary) + elif conclusion == 'skipped': + categorized['skipped'].append(job_summary) + elif conclusion == 'cancelled': + categorized['cancelled'].append(job_summary) + elif conclusion == 'success': + categorized['success'].append(job_summary) + elif status == 'in_progress': + categorized['in_progress'].append(job_summary) + elif status == 'queued': + categorized['queued'].append(job_summary) + else: + categorized['other'].append(job_summary) + + # Display summary + total_jobs = len(jobs) + output.append(f"Total jobs: {total_jobs}") + output.append("") + + for category, job_list in categorized.items(): + if job_list: + emoji = { + 'failed': '❌', + 'skipped': '⏭️', + 'cancelled': '🚫', + 'success': '✅', + 'in_progress': '⏳', + 'queued': '⏸️', + 'other': '❓' + }.get(category, '•') + + output.append(f"{emoji} {category.upper()}: {len(job_list)}") + for job in job_list: + output.append(f" - {job['name']} (ID: {job['id']})") + output.append("") + + # Analyze skipped jobs in context of syntax errors + if categorized['skipped'] and has_syntax_errors: + output.append("=" * 80) + output.append("⚠️ CRITICAL FINDING: SKIPPED JOBS + WORKFLOW SYNTAX ERRORS") + output.append("=" * 80) + output.append("") + output.append(f"Found {len(categorized['skipped'])} skipped job(s) AND workflow syntax errors.") + output.append("") + output.append("These jobs were likely skipped due to the syntax errors in the workflow file,") + output.append("NOT due to normal conditional logic (if/needs). The workflow syntax error") + output.append("prevented these jobs from being evaluated at all.") + output.append("") + output.append("ACTION REQUIRED:") + output.append("1. Review workflow annotations above for specific syntax errors") + output.append("2. Fix the syntax error in the workflow YAML file") + output.append("3. Re-run the workflow after fixing") + output.append("") + + elif categorized['skipped'] and not has_syntax_errors: + output.append("ℹ️ Jobs were skipped due to normal conditional logic (if/needs),") + output.append(" not workflow syntax errors.") + output.append("") + + return "\n".join(output) + + +def present_complete_diagnostic(log_file: Path, workspace: Optional[Path] = None) -> str: + """Present complete diagnostic package for AI. + + Args: + log_file: Path to log file + workspace: Optional workspace path for checking annotations + + Returns: + Complete diagnostic string + """ + output = [] + output.append("=" * 80) + output.append("COMPLETE DIAGNOSTIC EVIDENCE") + output.append("=" * 80) + output.append("") + + # 0. Check for workflow annotations first (critical for understanding skipped jobs) + if workspace: + annotations_file = workspace / "annotations.json" + jobs_file = workspace / "jobs-detailed.json" + + if annotations_file.exists() or jobs_file.exists(): + output.append("=" * 80) + output.append("STEP 0: WORKFLOW-LEVEL ISSUES (Check First!)") + output.append("=" * 80) + output.append("") + output.append("Checking for workflow syntax errors and job state issues...") + output.append("These issues explain why jobs may have been skipped or never run.") + output.append("") + + if annotations_file.exists(): + output.append(present_workflow_annotations(annotations_file)) + output.append("") + output.append("") + + if jobs_file.exists(): + output.append(present_job_state_analysis(jobs_file, annotations_file if annotations_file.exists() else None)) + output.append("") + output.append("") + + # 1. Failure evidence + output.append(present_failure_evidence(log_file)) + output.append("") + output.append("") + + # 2. First error context + output.append(get_first_error_context(log_file)) + output.append("") + output.append("") + + # 3. Timeline + output.append(get_failure_timeline(log_file)) + output.append("") + output.append("") + + # 4. Known issues + test_name = extract_test_name(log_file) + if test_name: + error_keywords = extract_error_keywords(log_file) + output.append(present_known_issues(test_name, error_keywords)) + + output.append("") + output.append("=" * 80) + output.append("END DIAGNOSTIC EVIDENCE - READY FOR AI ANALYSIS") + output.append("=" * 80) + + return "\n".join(output) + + +def extract_error_sections_only(log_file: Path, output_file: Path) -> None: + """Extract only error sections for large files (performance optimization). + + Args: + log_file: Path to input log file + output_file: Path to output file + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=== ERRORS AND FAILURES ===") + + # Get context around errors + error_lines = [] + for i, line in enumerate(lines): + if any(keyword in line for keyword in ["[ERROR]", "FAILURE!", "::error"]): + start = max(0, i - 20) + end = min(len(lines), i + 21) + error_lines.extend(lines[start:end]) + if len(error_lines) >= 2000: + break + + output.extend(error_lines[:2000]) + output.append("") + output.append("=== FIRST 200 LINES ===") + output.extend(lines[:200]) + output.append("") + output.append("=== LAST 200 LINES ===") + output.extend(lines[-200:]) + + output_file.write_text("\n".join(output), encoding='utf-8') + + +def get_log_stats(log_file: Path) -> str: + """Get log file stats. + + Args: + log_file: Path to log file + + Returns: + Stats string + """ + size = log_file.stat().st_size + size_mb = size / 1048576 + lines = len(log_file.read_text(encoding='utf-8', errors='ignore').split('\n')) + + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + error_count = log_content.count("[ERROR]") + failure_count = log_content.count("FAILURE!") + + output = [ + "=== LOG FILE STATISTICS ===", + f"File: {log_file}", + f"Size: {size} bytes ({size_mb:.2f} MB)", + f"Lines: {lines}", + f"Errors: {error_count}", + f"Failures: {failure_count}", + "" + ] + + if size_mb > 10: + output.append("⚠️ Large file detected. Consider using extract_error_sections_only() for faster analysis.") + + return "\n".join(output) + diff --git a/.claude/skills/cicd-diagnostics/utils/external_issues.py b/.claude/skills/cicd-diagnostics/utils/external_issues.py new file mode 100644 index 000000000000..a2a6a0664cf5 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/external_issues.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +"""External issue detection for CI/CD failures. + +Identifies when CI/CD failures are caused by external service changes +rather than code issues. +""" + +import re +from datetime import datetime +from typing import Dict, List, Optional, Tuple + + +def extract_error_indicators(log_content: str) -> Dict[str, List[str]]: + """Extract key indicators from logs that suggest external issues. + + Args: + log_content: Full log file content + + Returns: + Dictionary mapping indicator type to list of matches + """ + indicators = { + 'npm_errors': [], + 'docker_errors': [], + 'auth_errors': [], + 'network_errors': [], + 'service_names': set() + } + + lines = log_content.split('\n') + + for line in lines: + # NPM specific errors + if 'npm ERR!' in line: + indicators['npm_errors'].append(line.strip()) + indicators['service_names'].add('npm') + + # Extract error codes + if 'code E' in line: + match = re.search(r'code (E\w+)', line) + if match: + indicators['npm_errors'].append(f"Error code: {match.group(1)}") + + # Docker errors + if 'ERROR:' in line and any(keyword in line.lower() for keyword in ['docker', 'blob', 'image', 'registry']): + indicators['docker_errors'].append(line.strip()) + indicators['service_names'].add('docker') + + # Authentication errors (generic) + auth_keywords = [ + 'authentication', 'authorization', 'OTP', '2FA', 'token', + 'ENEEDAUTH', 'EOTP', 'unauthorized', 'forbidden', 'access denied' + ] + if any(keyword.lower() in line.lower() for keyword in auth_keywords): + if any(error in line for error in ['ERR!', 'ERROR:', '::error::', 'FAILURE:']): + indicators['auth_errors'].append(line.strip()) + + # Network/connectivity errors + network_keywords = [ + 'connection refused', 'timeout', 'cannot connect', + 'network error', 'ECONNREFUSED', 'ETIMEDOUT' + ] + if any(keyword.lower() in line.lower() for keyword in network_keywords): + indicators['network_errors'].append(line.strip()) + + # Convert set to list for JSON serialization + indicators['service_names'] = list(indicators['service_names']) + + return indicators + + +def generate_search_queries(indicators: Dict[str, List[str]], + failure_date: Optional[str] = None) -> List[str]: + """Generate web search queries based on error indicators. + + Args: + indicators: Error indicators from extract_error_indicators() + failure_date: Date of failure (YYYY-MM-DD format) + + Returns: + List of search query strings + """ + queries = [] + + # Extract month/year from failure date + date_context = "" + if failure_date: + try: + dt = datetime.strptime(failure_date, "%Y-%m-%d") + date_context = f"{dt.strftime('%B %Y')}" + except ValueError: + pass + + # NPM specific searches + if indicators['npm_errors']: + npm_codes = [line for line in indicators['npm_errors'] if 'Error code:' in line] + if npm_codes: + # Extract error code + for code_line in npm_codes: + code = code_line.split('Error code: ')[1] + queries.append(f'npm {code} authentication error {date_context}') + + # Check for token/2FA issues + if any('OTP' in err or '2FA' in err or 'token' in err.lower() + for err in indicators['npm_errors']): + queries.append(f'npm classic token revoked {date_context}') + queries.append(f'npm 2FA authentication CI/CD {date_context}') + + # Docker specific searches + if indicators['docker_errors']: + if any('blob' in err.lower() for err in indicators['docker_errors']): + queries.append(f'docker blob not found error {date_context}') + if any('registry' in err.lower() for err in indicators['docker_errors']): + queries.append(f'docker registry authentication {date_context}') + + # GitHub Actions searches + if any('actions' in err.lower() for err in + indicators['auth_errors'] + indicators['network_errors']): + queries.append(f'GitHub Actions runner issues {date_context}') + + # Generic service change searches + for service in indicators['service_names']: + queries.append(f'{service} breaking changes {date_context}') + queries.append(f'{service} security update {date_context}') + + return queries + + +def suggest_external_checks(indicators: Dict[str, List[str]], + failure_timeline: List[Tuple[str, str]]) -> Dict[str, any]: + """Suggest which external sources to check based on failure patterns. + + Args: + indicators: Error indicators from extract_error_indicators() + failure_timeline: List of (date, status) tuples showing failure history + + Returns: + Dictionary with suggested checks and reasoning + """ + suggestions = { + 'likelihood': 'low', # low, medium, high + 'checks': [], + 'reasoning': [] + } + + # Check if failures started on a specific date with no recovery + if len(failure_timeline) >= 3: + recent_failures = [status for _, status in failure_timeline[:5]] + if all(status == 'failure' for status in recent_failures): + suggestions['likelihood'] = 'medium' + suggestions['reasoning'].append( + "Multiple consecutive failures suggest external change or persistent issue" + ) + + # NPM authentication errors strongly suggest external changes + if indicators['npm_errors']: + if any('EOTP' in err or 'ENEEDAUTH' in err for err in indicators['npm_errors']): + suggestions['likelihood'] = 'high' + suggestions['checks'].append({ + 'source': 'npm registry changelog', + 'url': 'https://github.blog/changelog/', + 'search_for': 'npm security token authentication 2FA' + }) + suggestions['reasoning'].append( + "NPM authentication errors (EOTP/ENEEDAUTH) often caused by npm registry policy changes" + ) + + # Docker authentication/registry errors + if indicators['docker_errors'] and indicators['auth_errors']: + suggestions['likelihood'] = 'high' if suggestions['likelihood'] != 'high' else 'high' + suggestions['checks'].append({ + 'source': 'Docker Hub status', + 'url': 'https://status.docker.com/', + 'search_for': 'Docker Hub registry authentication' + }) + suggestions['reasoning'].append( + "Docker authentication errors may indicate Docker Hub policy changes or outages" + ) + + # Generic authentication without specific service + if indicators['auth_errors'] and not indicators['service_names']: + suggestions['checks'].append({ + 'source': 'GitHub Actions status', + 'url': 'https://www.githubstatus.com/', + 'search_for': 'GitHub Actions runner authentication' + }) + + return suggestions + + +def format_external_issue_report(indicators: Dict[str, List[str]], + search_queries: List[str], + suggestions: Dict[str, any]) -> str: + """Format external issue detection report for inclusion in diagnosis. + + Args: + indicators: Error indicators + search_queries: Generated search queries + suggestions: Suggested checks + + Returns: + Formatted markdown report section + """ + report = [] + + report.append("## External Issue Detection\n") + + # Likelihood assessment + likelihood_emoji = { + 'low': '⚪', + 'medium': '🟡', + 'high': '🔴' + } + emoji = likelihood_emoji.get(suggestions['likelihood'], '⚪') + report.append(f"**External Cause Likelihood:** {emoji} {suggestions['likelihood'].upper()}\n") + + # Reasoning + if suggestions['reasoning']: + report.append("**Indicators:**") + for reason in suggestions['reasoning']: + report.append(f"- {reason}") + report.append("") + + # Service-specific errors + if indicators['npm_errors']: + report.append("**NPM Errors Detected:**") + for err in indicators['npm_errors'][:5]: # Show first 5 + report.append(f"- `{err}`") + report.append("") + + if indicators['docker_errors']: + report.append("**Docker Errors Detected:**") + for err in indicators['docker_errors'][:3]: + report.append(f"- `{err}`") + report.append("") + + if indicators['auth_errors']: + report.append("**Authentication Errors Detected:**") + for err in indicators['auth_errors'][:3]: + report.append(f"- `{err}`") + report.append("") + + # Recommended searches + if search_queries: + report.append("**Recommended Web Searches:**") + for query in search_queries[:5]: # Top 5 queries + report.append(f"- `{query}`") + report.append("") + + # Specific checks + if suggestions['checks']: + report.append("**Suggested External Checks:**") + for check in suggestions['checks']: + report.append(f"- **{check['source']}**: {check['url']}") + report.append(f" Search for: `{check['search_for']}`") + report.append("") + + return '\n'.join(report) + + +if __name__ == "__main__": + # Example usage + import sys + from pathlib import Path + + if len(sys.argv) < 2: + print("Usage: python external_issues.py ") + sys.exit(1) + + log_file = Path(sys.argv[1]) + if not log_file.exists(): + print(f"Error: Log file not found: {log_file}") + sys.exit(1) + + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + + indicators = extract_error_indicators(log_content) + queries = generate_search_queries(indicators, "2025-11-10") + suggestions = suggest_external_checks(indicators, [ + ("2025-11-10", "failure"), + ("2025-11-09", "failure"), + ("2025-11-08", "failure"), + ("2025-11-07", "failure"), + ("2025-11-06", "success") + ]) + + report = format_external_issue_report(indicators, queries, suggestions) + print(report) diff --git a/.claude/skills/cicd-diagnostics/utils/github_api.py b/.claude/skills/cicd-diagnostics/utils/github_api.py new file mode 100755 index 000000000000..95b749673911 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/github_api.py @@ -0,0 +1,507 @@ +#!/usr/bin/env python3 +"""GitHub API Utility Functions for CI/CD Diagnostics. + +Provides reusable functions for interacting with GitHub API and CLI. +""" + +import re +import subprocess +import json +from typing import Optional, Dict, Any, List +from pathlib import Path + + +def extract_run_id(url: str) -> Optional[str]: + """Extract run ID from GitHub Actions URL. + + Args: + url: GitHub Actions run URL + + Returns: + Run ID or None if not found + """ + match = re.search(r'/runs/(\d+)', url) + return match.group(1) if match else None + + +def extract_pr_number(input_str: str) -> Optional[str]: + """Extract PR number from URL or branch name. + + Args: + input_str: PR URL or branch name + + Returns: + PR number or None if not found + """ + # Try pull URL pattern + match = re.search(r'/pull/(\d+)', input_str) + if match: + return match.group(1) + + # Try branch name pattern (issue-123-feature-name) + match = re.search(r'issue-(\d+)', input_str) + if match: + return match.group(1) + + return None + + +def get_run_metadata(run_id: str, output_file: Path) -> None: + """Get workflow run metadata. + + Args: + run_id: GitHub Actions run ID + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "run", "view", run_id, + "--json", "conclusion,status,event,headBranch,headSha,workflowName,url,createdAt,updatedAt,displayTitle" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def get_jobs_detailed(run_id: str, output_file: Path) -> None: + """Get all jobs for a workflow run with detailed step information. + + Args: + run_id: GitHub Actions run ID + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/actions/runs/{run_id}/jobs", + "--paginate" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def get_failed_jobs(jobs_file: Path) -> List[Dict[str, Any]]: + """Get failed jobs from detailed jobs file. + + Args: + jobs_file: Path to jobs JSON file + + Returns: + List of failed job dictionaries + """ + jobs_data = json.loads(jobs_file.read_text(encoding='utf-8')) + return [job for job in jobs_data.get('jobs', []) if job.get('conclusion') == 'failure'] + + +def get_canceled_jobs(jobs_file: Path) -> List[Dict[str, Any]]: + """Get canceled jobs from detailed jobs file. + + Args: + jobs_file: Path to jobs JSON file + + Returns: + List of canceled job dictionaries + """ + jobs_data = json.loads(jobs_file.read_text(encoding='utf-8')) + return [job for job in jobs_data.get('jobs', []) if job.get('conclusion') == 'cancelled'] + + +def download_job_logs(job_id: str, output_file: Path) -> None: + """Download logs for a specific job. + + Args: + job_id: GitHub Actions job ID + output_file: Path to save logs + """ + result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/actions/jobs/{job_id}/logs" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def get_pr_info(pr_num: str, output_file: Path) -> None: + """Get PR information including status check rollup. + + Args: + pr_num: PR number + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "pr", "view", pr_num, + "--json", "number,headRefOid,headRefName,title,author,statusCheckRollup" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def find_failed_run_from_pr(pr_info_file: Path) -> Optional[str]: + """Find failed run from PR info. + + Args: + pr_info_file: Path to PR info JSON file + + Returns: + Run ID or None if not found + """ + pr_data = json.loads(pr_info_file.read_text(encoding='utf-8')) + + status_checks = pr_data.get('statusCheckRollup', []) + for check in status_checks: + if (check.get('conclusion') == 'FAILURE' and + check.get('workflowName') == '-1 PR Check'): + details_url = check.get('detailsUrl', '') + return extract_run_id(details_url) + + return None + + +def get_recent_runs(workflow_name: str, limit: int = 20, output_file: Optional[Path] = None) -> List[Dict[str, Any]]: + """Get recent workflow runs. + + Args: + workflow_name: Name of the workflow + limit: Maximum number of runs to fetch + output_file: Optional path to save JSON output + + Returns: + List of run dictionaries + """ + result = subprocess.run( + [ + "gh", "run", "list", + "--workflow", workflow_name, + "--limit", str(limit), + "--json", "databaseId,conclusion,headSha,displayTitle,createdAt" + ], + capture_output=True, + text=True, + check=True + ) + + runs = json.loads(result.stdout) + + if output_file: + output_file.write_text(result.stdout, encoding='utf-8') + + return runs + + +def get_artifacts(run_id: str, output_file: Path) -> None: + """Get artifacts for a workflow run. + + Args: + run_id: GitHub Actions run ID + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/actions/runs/{run_id}/artifacts", + "--jq", ".artifacts[] | {name, id, size_in_bytes, expired}" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def search_issues(query: str, output_file: Optional[Path] = None) -> List[Dict[str, Any]]: + """Search for related GitHub issues. + + Args: + query: Search query + output_file: Optional path to save JSON output + + Returns: + List of issue dictionaries + """ + result = subprocess.run( + [ + "gh", "issue", "list", + "--search", query, + "--json", "number,title,state,labels,createdAt", + "--limit", "10" + ], + capture_output=True, + text=True, + check=True + ) + + issues = json.loads(result.stdout) + + if output_file: + output_file.write_text(result.stdout, encoding='utf-8') + + return issues + + +def get_issue(issue_num: str, output_file: Path) -> None: + """Get issue details. + + Args: + issue_num: Issue number + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "issue", "view", issue_num, + "--json", "title,body,labels,author" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def compare_commits(base_sha: str, head_sha: str, output_file: Path) -> None: + """Compare two commits. + + Args: + base_sha: Base commit SHA + head_sha: Head commit SHA + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/compare/{base_sha}...{head_sha}", + "--jq", ".commits[] | {sha: .sha[:7], message: .commit.message, author: .commit.author.name}" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def get_prs_for_branch(branch: str, output_file: Path) -> None: + """Get PR list for current branch. + + Args: + branch: Branch name + output_file: Path to save JSON output + """ + result = subprocess.run( + [ + "gh", "pr", "list", + "--head", branch, + "--json", "number,url,headRefOid,title,author" + ], + capture_output=True, + text=True, + check=True + ) + output_file.write_text(result.stdout, encoding='utf-8') + + +def get_runs_for_commit(workflow_name: str, commit_sha: str, limit: int = 5) -> List[Dict[str, Any]]: + """Get workflow runs for specific commit. + + Args: + workflow_name: Name of the workflow + commit_sha: Commit SHA + limit: Maximum number of runs to fetch + + Returns: + List of run dictionaries + """ + result = subprocess.run( + [ + "gh", "run", "list", + "--workflow", workflow_name, + "--commit", commit_sha, + "--limit", str(limit), + "--json", "databaseId,conclusion,status,displayTitle" + ], + capture_output=True, + text=True, + check=True + ) + + return json.loads(result.stdout) + + +def is_macos() -> bool: + """Check if running on macOS.""" + import platform + return platform.system() == "Darwin" + + +def get_workflow_run_annotations(run_id: str, output_file: Optional[Path] = None) -> List[Dict[str, Any]]: + """Get workflow run annotations (syntax errors, validation failures, etc.). + + Annotations include GitHub Actions workflow syntax validation errors that are + shown in the UI but not in job logs. These can indicate why jobs were skipped + or never evaluated. + + Example annotation: + { + "path": ".github/workflows/cicd_6-release.yml", + "start_line": 132, + "end_line": 132, + "start_column": 24, + "end_column": 28, + "annotation_level": "failure", + "title": "Invalid workflow file", + "message": "Unexpected value 'true'", + "raw_details": "..." + } + + Args: + run_id: GitHub Actions run ID + output_file: Optional path to save JSON output + + Returns: + List of annotation dictionaries + """ + try: + # Use gh api to get check runs for the workflow run + # First, get the check suite ID from the run + run_result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/actions/runs/{run_id}", + "--jq", ".check_suite_id" + ], + capture_output=True, + text=True, + check=True + ) + check_suite_id = run_result.stdout.strip() + + if not check_suite_id: + return [] + + # Get check runs for the check suite + check_runs_result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/check-suites/{check_suite_id}/check-runs", + "--paginate" + ], + capture_output=True, + text=True, + check=True + ) + + check_runs_data = json.loads(check_runs_result.stdout) + + # Collect all annotations from all check runs + all_annotations = [] + for check_run in check_runs_data.get('check_runs', []): + check_run_id = check_run.get('id') + if not check_run_id: + continue + + # Get annotations for this check run + annotations_result = subprocess.run( + [ + "gh", "api", + f"/repos/dotCMS/core/check-runs/{check_run_id}/annotations", + "--paginate" + ], + capture_output=True, + text=True + ) + + if annotations_result.returncode == 0 and annotations_result.stdout.strip(): + try: + annotations = json.loads(annotations_result.stdout) + if isinstance(annotations, list): + all_annotations.extend(annotations) + except json.JSONDecodeError: + continue + + if output_file: + output_file.write_text(json.dumps(all_annotations, indent=2), encoding='utf-8') + + return all_annotations + + except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e: + # Return empty list if annotations cannot be fetched + # This is not a critical failure - annotations may not exist for all runs + return [] + + +def get_skipped_jobs(jobs_file: Path) -> List[Dict[str, Any]]: + """Get skipped jobs from detailed jobs file. + + Args: + jobs_file: Path to jobs JSON file + + Returns: + List of skipped job dictionaries + """ + jobs_data = json.loads(jobs_file.read_text(encoding='utf-8')) + return [job for job in jobs_data.get('jobs', []) if job.get('conclusion') == 'skipped'] + + +def categorize_job_states(jobs_file: Path) -> Dict[str, List[Dict[str, Any]]]: + """Categorize jobs by their state. + + Distinguishes between: + - failed: Jobs that ran and failed + - skipped: Jobs that were intentionally skipped (e.g., due to conditions) + - cancelled: Jobs that were cancelled + - never_evaluated: Jobs that never ran due to syntax errors or workflow issues + + Args: + jobs_file: Path to jobs JSON file + + Returns: + Dictionary with categorized jobs + """ + jobs_data = json.loads(jobs_file.read_text(encoding='utf-8')) + jobs = jobs_data.get('jobs', []) + + categorized = { + 'failed': [], + 'skipped': [], + 'cancelled': [], + 'success': [], + 'in_progress': [], + 'queued': [], + 'never_evaluated': [] + } + + for job in jobs: + conclusion = job.get('conclusion') + status = job.get('status') + + if conclusion == 'failure': + categorized['failed'].append(job) + elif conclusion == 'skipped': + categorized['skipped'].append(job) + elif conclusion == 'cancelled': + categorized['cancelled'].append(job) + elif conclusion == 'success': + categorized['success'].append(job) + elif status == 'in_progress': + categorized['in_progress'].append(job) + elif status == 'queued': + categorized['queued'].append(job) + else: + # Job may have been never evaluated if no conclusion and not in progress/queued + if not conclusion and status == 'completed': + categorized['never_evaluated'].append(job) + + return categorized + + diff --git a/.claude/skills/cicd-diagnostics/utils/html_scraper.py b/.claude/skills/cicd-diagnostics/utils/html_scraper.py new file mode 100644 index 000000000000..750134209861 --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/html_scraper.py @@ -0,0 +1,350 @@ +""" +HTML Scraper for GitHub Actions Workflow Annotations + +⚠️ WARNING: This module scrapes GitHub's HTML interface to extract workflow-level +validation errors that are not exposed through the official REST API. + +This is a workaround for a known GitHub API limitation where workflow syntax +validation errors are visible in the UI but not accessible programmatically. + +IMPORTANT CAVEATS: +- This is NOT an official API and may break at any time +- GitHub may change their HTML structure without notice +- This should be considered a temporary workaround +- Only use when official API endpoints don't provide the needed data + +Last tested: 2025-12-09 with GitHub Actions UI +""" + +import json +import re +import subprocess +from pathlib import Path +from typing import Dict, List, Optional + + +def scrape_workflow_annotations(run_id: str, owner: str = "dotCMS", repo: str = "core") -> Dict: + """ + Scrape workflow-level annotations from GitHub Actions HTML page. + + Args: + run_id: GitHub Actions run ID + owner: Repository owner (default: dotCMS) + repo: Repository name (default: core) + + Returns: + Dict with structure: + { + "workflow_annotations": [ + { + "level": "failure" | "warning" | "notice", + "message": "Error message", + "path": ".github/workflows/...", + "line": 132, + "col": 24 + } + ], + "source": "html_scrape", + "warning": "This data was scraped from HTML and may be fragile" + } + """ + url = f"https://github.com/{owner}/{repo}/actions/runs/{run_id}" + + try: + # Fetch HTML directly with curl (gh api doesn't properly handle HTML responses) + result = subprocess.run( + ["curl", "-s", "-L", url], + capture_output=True, + text=True, + timeout=30 + ) + + if result.returncode != 0: + return { + "workflow_annotations": [], + "error": f"Failed to fetch HTML: {result.stderr}", + "source": "html_scrape" + } + + html_content = result.stdout + + if not html_content or len(html_content) < 1000: + return { + "workflow_annotations": [], + "error": "HTML content appears invalid or empty", + "source": "html_scrape" + } + + # Parse annotations from HTML + annotations = parse_annotations_from_html(html_content, run_id) + + return { + "workflow_annotations": annotations, + "source": "html_scrape", + "warning": "This data was scraped from HTML and may become invalid if GitHub changes their UI structure", + "url": url + } + + except subprocess.TimeoutExpired: + return { + "workflow_annotations": [], + "error": "Timeout while fetching HTML", + "source": "html_scrape" + } + except Exception as e: + return { + "workflow_annotations": [], + "error": f"Exception during HTML scraping: {str(e)}", + "source": "html_scrape" + } + + +def parse_annotations_from_html(html_content: str, run_id: str) -> List[Dict]: + """ + Parse annotation data from GitHub Actions HTML page. + + The HTML structure: + + (indicates level: failure, warning, notice) + Title (e.g., "-6 Release Process") +
+
Full annotation text here
+
+
+ + Args: + html_content: Raw HTML content from GitHub Actions page + run_id: Run ID for context in error messages + + Returns: + List of annotation dictionaries + """ + annotations = [] + + # Look for blocks + annotation_block_pattern = r']*>(.*?)' + block_matches = re.finditer(annotation_block_pattern, html_content, re.DOTALL | re.IGNORECASE) + + for block_match in block_matches: + block_content = block_match.group(1) + + # Extract annotation title (usually in tag) + title_pattern = r']*>(.*?)' + title_match = re.search(title_pattern, block_content, re.DOTALL) + title = title_match.group(1).strip() if title_match else "Unknown" + + # Determine level from SVG icon + level = 'notice' # default + if 'octicon-x-circle' in block_content or 'octicon-stop' in block_content: + level = 'failure' + elif 'octicon-alert' in block_content: + level = 'warning' + elif 'octicon-info' in block_content: + level = 'notice' + + # Extract annotation text from inner div + container_pattern = r']*data-target=["\']annotation-message\.annotationContainer["\'][^>]*>.*?
(.*?)
' + container_match = re.search(container_pattern, block_content, re.DOTALL) + + if container_match: + annotation_text = container_match.group(1).strip() + + # Skip empty or very short annotations (likely not workflow errors) + if len(annotation_text) < 10: + continue + + annotation = { + "level": level, + "title": title, + "message": annotation_text + } + + # Avoid duplicates (same message) + if not any(a.get('message') == annotation_text for a in annotations): + annotations.append(annotation) + + return annotations + + +def extract_annotations_from_json(data: any, path: str = "") -> List[Dict]: + """ + Recursively extract annotation data from JSON structures. + + Args: + data: JSON data (dict, list, or primitive) + path: Current path in JSON structure (for debugging) + + Returns: + List of annotation dictionaries found in the JSON + """ + annotations = [] + + if isinstance(data, dict): + # Check if this dict looks like an annotation + if 'annotation_level' in data or 'annotationLevel' in data: + annotation = { + "level": data.get('annotation_level') or data.get('annotationLevel'), + "message": data.get('message') or data.get('title') or '', + "path": data.get('path'), + "line": data.get('start_line') or data.get('startLine'), + "col": data.get('start_column') or data.get('startColumn') + } + annotations.append(annotation) + + # Check for common annotation array keys + for key in ['annotations', 'checkAnnotations', 'errors', 'warnings']: + if key in data and isinstance(data[key], list): + for item in data[key]: + annotations.extend(extract_annotations_from_json(item, f"{path}.{key}")) + + # Recurse into other dict values + for key, value in data.items(): + if key not in ['annotations', 'checkAnnotations', 'errors', 'warnings']: + annotations.extend(extract_annotations_from_json(value, f"{path}.{key}")) + + elif isinstance(data, list): + for i, item in enumerate(data): + annotations.extend(extract_annotations_from_json(item, f"{path}[{i}]")) + + return annotations + + +def save_scraped_annotations(run_id: str, workspace: Path, annotations_data: Dict): + """ + Save scraped annotations to workspace with appropriate warnings. + + Args: + run_id: GitHub Actions run ID + workspace: Diagnostic workspace directory + annotations_data: Scraped annotations data + """ + output_file = workspace / "workflow-annotations-scraped.json" + + # Add metadata + annotations_data['run_id'] = run_id + annotations_data['scrape_timestamp'] = subprocess.run( + ["date", "-Iseconds"], + capture_output=True, + text=True + ).stdout.strip() + + with open(output_file, 'w') as f: + json.dump(annotations_data, f, indent=2) + + print(f"✓ Scraped workflow annotations saved to {output_file}") + + if annotations_data.get('workflow_annotations'): + print(f" Found {len(annotations_data['workflow_annotations'])} workflow-level annotations") + + # Group by level + by_level = {} + for ann in annotations_data['workflow_annotations']: + level = ann.get('level', 'unknown') + by_level[level] = by_level.get(level, 0) + 1 + + for level, count in sorted(by_level.items()): + print(f" {level}: {count}") + else: + print(" No workflow-level annotations found in HTML") + + if 'warning' in annotations_data: + print(f"\n⚠️ {annotations_data['warning']}") + + +def format_scraped_annotations_report(annotations_data: Dict) -> str: + """ + Format scraped annotations into a human-readable report. + + Args: + annotations_data: Scraped annotations data + + Returns: + Formatted report string + """ + report = [] + report.append("=" * 80) + report.append("WORKFLOW-LEVEL ANNOTATIONS (SCRAPED FROM HTML)") + report.append("=" * 80) + + if 'warning' in annotations_data: + report.append(f"\n⚠️ WARNING: {annotations_data['warning']}\n") + + if 'error' in annotations_data: + report.append(f"\n❌ ERROR: {annotations_data['error']}\n") + return "\n".join(report) + + annotations = annotations_data.get('workflow_annotations', []) + + if not annotations: + report.append("\nNo workflow-level annotations found in HTML.") + report.append("This might mean:") + report.append(" • There are no workflow syntax errors") + report.append(" • GitHub changed their HTML structure (scraper needs update)") + report.append(" • The page couldn't be accessed") + return "\n".join(report) + + # Group by level + by_level = {'failure': [], 'warning': [], 'notice': []} + for ann in annotations: + level = ann.get('level', 'notice') + if level not in by_level: + by_level[level] = [] + by_level[level].append(ann) + + # Report failures + if by_level['failure']: + report.append(f"\n❌ ERRORS ({len(by_level['failure'])})") + report.append("-" * 80) + for ann in by_level['failure']: + report.append(f"\n Title: {ann.get('title', 'Unknown')}") + report.append(f" Message:\n {ann['message'][:500]}..." if len(ann['message']) > 500 else f" Message:\n {ann['message']}") + + # Report warnings + if by_level['warning']: + report.append(f"\n⚠️ WARNINGS ({len(by_level['warning'])})") + report.append("-" * 80) + for ann in by_level['warning']: + report.append(f"\n Title: {ann.get('title', 'Unknown')}") + report.append(f" Message:\n {ann['message'][:500]}..." if len(ann['message']) > 500 else f" Message:\n {ann['message']}") + + # Report notices + if by_level['notice']: + report.append(f"\nℹ️ NOTICES ({len(by_level['notice'])})") + report.append("-" * 80) + for ann in by_level['notice']: + report.append(f"\n Title: {ann.get('title', 'Unknown')}") + # Show first few lines of message for notices + message_lines = ann['message'].split('\n') + if len(message_lines) > 4: + preview = '\n '.join(message_lines[:4]) + report.append(f" Message:\n {preview}\n ... ({len(message_lines) - 4} more lines)") + else: + report.append(f" Message:\n {ann['message']}") + + report.append("\n" + "=" * 80) + report.append(f"Source: {annotations_data.get('url', 'N/A')}") + report.append("=" * 80) + + return "\n".join(report) + + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print("Usage: python html_scraper.py [WORKSPACE]") + print("\nExample:") + print(" python html_scraper.py 20043196360 /path/to/workspace") + sys.exit(1) + + run_id = sys.argv[1] + workspace = Path(sys.argv[2]) if len(sys.argv) > 2 else Path(f"./.claude/diagnostics/run-{run_id}") + workspace.mkdir(parents=True, exist_ok=True) + + print(f"Scraping workflow annotations for run {run_id}...") + print(f"⚠️ WARNING: This uses HTML scraping and may break if GitHub changes their UI\n") + + annotations_data = scrape_workflow_annotations(run_id) + save_scraped_annotations(run_id, workspace, annotations_data) + + print("\n" + format_scraped_annotations_report(annotations_data)) \ No newline at end of file diff --git a/.claude/skills/cicd-diagnostics/utils/tiered_extraction.py b/.claude/skills/cicd-diagnostics/utils/tiered_extraction.py new file mode 100755 index 000000000000..764ed567b6aa --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/tiered_extraction.py @@ -0,0 +1,597 @@ +#!/usr/bin/env python3 +"""Tiered Evidence Extraction. + +Creates multiple levels of detail for progressive analysis. +""" + +import re +from pathlib import Path +from typing import List + + +def extract_level1_summary(log_file: Path, output_file: Path) -> None: + """Level 1: Test Summary Only (ALWAYS fits in context - ~500 tokens max). + + Purpose: Quick overview of what failed + + Args: + log_file: Path to log file + output_file: Path to output file + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=" * 80) + output.append("LEVEL 1: TEST SUMMARY (Quick Overview)") + output.append("=" * 80) + output.append("") + + # Overall test results + output.append("=== OVERALL TEST RESULTS ===") + test_results = [ + line for line in lines + if "Tests run:" in line and ("Failures:" in line or "Errors:" in line) or "BUILD SUCCESS" in line or "BUILD FAILURE" in line + ][-5:] + output.extend(test_results) + output.append("") + + # List of failed tests + output.append("=== FAILED TESTS (Names Only) ===") + failed_tests = [] + for line in lines: + if "[ERROR]" in line and "Test." in line: + match = re.search(r'\[ERROR\] ([^\s]+)', line) + if match: + failed_tests.append(match.group(1)) + output.extend(list(set(failed_tests))[:20]) + output.append("") + + # Retry patterns + output.append("=== RETRY PATTERNS ===") + has_retries = any("Run " in line and ":" in line for line in lines) + if has_retries: + output.append("Tests were retried (Surefire rerunFailingTestsCount active)") + retry_lines = [ + line for line in lines + if "[ERROR]" in line or ("Run " in line and ":" in line) + ][:15] + output.extend(retry_lines) + output.append("") + flake_lines = [ + line for line in lines + if "[WARNING]" in line or ("Run " in line and ":" in line) + ][:15] + output.extend(flake_lines) + else: + output.append("No retry patterns detected") + output.append("") + + # Quick classification hints + output.append("=== CLASSIFICATION HINTS ===") + log_lower = log_content.lower() + has_timeout = "timeout" in log_lower or "conditiontimeout" in log_lower + has_assertion = "assertionerror" in log_lower or "expected:" in log_lower and "but was:" in log_lower + has_npe = "nullpointerexception" in log_lower + has_infra = any(kw in log_lower for kw in ["connection refused", "docker", "failed", "container", "error"]) + + output.append(f"Timeout errors: {sum(1 for _ in [True] if has_timeout)}") + output.append(f"Assertion errors: {sum(1 for _ in [True] if has_assertion)}") + output.append(f"NullPointerException: {sum(1 for _ in [True] if has_npe)}") + output.append(f"Infrastructure errors: {sum(1 for _ in [True] if has_infra)}") + output.append("") + + output.append("=" * 80) + output.append("Use extract_level2_unique_failures() for detailed error messages") + output.append("=" * 80) + + output_file.write_text("\n".join(output), encoding='utf-8') + + +def extract_level2_unique_failures(log_file: Path, output_file: Path) -> None: + """Level 2: Unique Failures (Moderate detail - ~5000 tokens max). + + Purpose: First occurrence of each unique failure with error messages + + Args: + log_file: Path to log file + output_file: Path to output file + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=" * 80) + output.append("LEVEL 2: UNIQUE FAILURES (Detailed Error Messages)") + output.append("=" * 80) + output.append("") + + # Parse retry summary + output.append("=== DETERMINISTIC FAILURES (Failed All Retries) ===") + if "Errors:" in log_content: + # Extract error section + error_start = None + for i, line in enumerate(lines): + if "[ERROR] Errors:" in line: + error_start = i + break + + if error_start is not None: + error_section = lines[error_start:error_start + 50] + output.extend(error_section[:100]) + else: + output.append("No deterministic failures (all retries failed)") + output.append("") + + output.append("=== FLAKY FAILURES (Passed Some Retries) ===") + if "Flakes:" in log_content: + flake_start = None + for i, line in enumerate(lines): + if "[WARNING] Flakes:" in line: + flake_start = i + break + + if flake_start is not None: + flake_section = lines[flake_start:flake_start + 50] + output.extend(flake_section[:100]) + else: + output.append("No flaky tests detected") + output.append("") + + # Get first occurrence of each unique error message + output.append("=== UNIQUE ERROR MESSAGES (First Occurrence) ===") + + # ConditionTimeoutException + if "ConditionTimeoutException" in log_content: + output.append("--- Awaitility Timeout ---") + for i, line in enumerate(lines): + if "ConditionTimeoutException" in line: + start = max(0, i - 5) + end = min(len(lines), i + 16) + output.extend(lines[start:end]) + if len(output) >= 40: + break + output.append("") + + # AssertionError / ComparisonFailure + if "AssertionError" in log_content or "ComparisonFailure" in log_content: + output.append("--- Assertion Failures ---") + for i, line in enumerate(lines): + if "AssertionError" in line or "ComparisonFailure" in line: + start = max(0, i - 3) + end = min(len(lines), i + 11) + output.extend(lines[start:end]) + if len(output) >= 50: + break + output.append("") + + # NullPointerException + if "NullPointerException" in log_content: + output.append("--- NullPointerException ---") + for i, line in enumerate(lines): + if "NullPointerException" in line: + start = max(0, i - 5) + end = min(len(lines), i + 11) + output.extend(lines[start:end]) + if len(output) >= 30: + break + output.append("") + + # Other exceptions + output.append("--- Other Exceptions (First 3) ---") + exception_count = 0 + for i, line in enumerate(lines): + if "Exception:" in line and "ConditionTimeout" not in line and "AssertionError" not in line and "NullPointer" not in line: + start = max(0, i - 3) + end = min(len(lines), i + 9) + output.extend(lines[start:end]) + exception_count += 1 + if exception_count >= 3: + break + output.append("") + + output.append("=" * 80) + output.append("Use extract_level3_full_context() for complete stack traces and timing") + output.append("=" * 80) + + output_file.write_text("\n".join(output), encoding='utf-8') + + +def extract_level3_full_context(log_file: Path, output_file: Path) -> None: + """Level 3: Full Context (Comprehensive - ~15000 tokens max). + + Purpose: Complete stack traces, timing correlation, all retry attempts + + Args: + log_file: Path to log file + output_file: Path to output file + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=" * 80) + output.append("LEVEL 3: FULL CONTEXT (Complete Details)") + output.append("=" * 80) + output.append("") + + # Complete retry analysis + output.append("=== COMPLETE RETRY ANALYSIS ===") + results_start = None + for i, line in enumerate(lines): + if "[INFO] Results:" in line: + results_start = i + break + + if results_start is not None: + output.extend(lines[results_start:results_start + 300]) + output.append("") + + # All error sections with full stack traces + output.append("=== ALL ERROR SECTIONS WITH STACK TRACES ===") + error_contexts = [] + for i, line in enumerate(lines): + if "[ERROR]" in line and "Test." in line: + start = max(0, i - 10) + end = min(len(lines), i + 31) + error_contexts.extend(lines[start:end]) + if len(error_contexts) >= 500: + break + output.extend(error_contexts[:500]) + output.append("") + + # Timing correlation + output.append("=== TIMING CORRELATION ===") + timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}') + timing_lines = [ + line for line in lines + if timestamp_pattern.match(line) and ("ERROR" in line or "FAILURE" in line or "Exception" in line) + ][:100] + output.extend(timing_lines) + output.append("") + + # Infrastructure events + output.append("=== INFRASTRUCTURE EVENTS ===") + infra_keywords = ["docker", "container", "elasticsearch", "database", "connection"] + infra_lines = [ + line for line in lines + if any(kw.lower() in line.lower() for kw in infra_keywords) and + any(kw in line.lower() for kw in ["error", "failed", "refused", "timeout"]) + ][:50] + output.extend(infra_lines) + output.append("") + + output.append("=" * 80) + output.append("This is the most detailed extraction level available") + output.append("=" * 80) + + output_file.write_text("\n".join(output), encoding='utf-8') + + +def extract_failed_test_names(log_file: Path) -> List[str]: + """Extract failed test names. + + Args: + log_file: Path to log file + + Returns: + List of test names + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + test_names = set() + + # E2E test names + for line in lines: + if "::error file=" in line: + match = re.search(r'file=([^,]+)', line) + if match: + file_path = match.group(1) + test_name = Path(file_path).stem.replace('.spec', '') + test_names.add(test_name) + + # JUnit/Maven test names + for line in lines: + if "<<< FAILURE!" in line: + match = re.search(r'\[ERROR\] ([^\s]+)', line) + if match: + test_names.add(match.group(1)) + + # Postman collection failures + for line in lines: + if "Collection" in line and "had failures" in line: + match = re.search(r'Collection ([^\s]+) had failures', line) + if match: + test_names.add(match.group(1)) + + return sorted(test_names) + + +def extract_postman_failures(log_file: Path, output_file: Path) -> None: + """Extract Postman test failures with full details. + + Purpose: Parse Postman/Newman test output for API test failures + + Args: + log_file: Path to log file + output_file: Path to output file + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=" * 80) + output.append("POSTMAN/NEWMAN TEST FAILURES") + output.append("=" * 80) + output.append("") + + # Find test summary + output.append("=== TEST SUMMARY ===") + for i, line in enumerate(lines): + if re.search(r'│\s+(executed|iterations|requests|test-scripts)', line): + output.append(line) + # Get surrounding lines for context + if i + 1 < len(lines) and '│' in lines[i + 1]: + continue + output.append("") + + # Find collection that failed + output.append("=== FAILED COLLECTIONS ===") + for line in lines: + if "Collection" in line and "had failures" in line: + output.append(line) + output.append("") + + # Extract individual failure details + output.append("=== FAILURE DETAILS ===") + in_failure_section = False + failure_count = 0 + + for i, line in enumerate(lines): + # Start of failure section + if re.search(r'\[INFO\]\s+#\s+failure\s+detail', line): + in_failure_section = True + output.append(line) + continue + + # In failure section + if in_failure_section: + # Individual failure entry + if re.search(r'\[INFO\]\s+\d+\.\s+(AssertionError|AssertionFailure|Error)', line): + failure_count += 1 + output.append("") + output.append(f"--- Failure #{failure_count} ---") + + # Extract failure details (next 10 lines) + for j in range(i, min(i + 12, len(lines))): + output.append(lines[j]) + if lines[j].strip() == "" or (j > i and re.search(r'\[INFO\]\s+\d+\.', lines[j])): + break + + # End of failure section + if "Collection" in line and "had failures" in line: + in_failure_section = False + break + + if failure_count >= 10: # Limit to first 10 failures + output.append("") + output.append("(Additional failures truncated...)") + break + + if failure_count == 0: + output.append("No Postman failures detected") + output.append("") + + # Extract test names from failure section + output.append("=== FAILED TEST NAMES ===") + failed_tests = set() + for line in lines: + # Pattern: inside "Collection Name / Test Name / Sub Test" + match = re.search(r'inside "(([^"]+) / ([^"]+))"', line) + if match: + failed_tests.add(match.group(1)) + + if failed_tests: + for test in sorted(failed_tests): + output.append(f" • {test}") + else: + output.append(" None found") + output.append("") + + output.append("=" * 80) + output.append(f"Total Postman Failures Extracted: {failure_count}") + output.append("=" * 80) + + output_file.write_text("\n".join(output), encoding='utf-8') + + +def auto_extract_tiered(log_file: Path, workspace: Path) -> None: + """Auto-tiered extraction (chooses appropriate level based on log size). + + Args: + log_file: Path to log file + workspace: Workspace directory + """ + size = log_file.stat().st_size + size_mb = size / 1048576 + + print("=== Auto-Tiered Extraction ===") + print(f"Log size: {size_mb:.2f} MB") + print("") + + # Always create Level 1 + print("Creating Level 1 (Summary)...") + level1_file = workspace / "evidence-level1-summary.txt" + extract_level1_summary(log_file, level1_file) + l1_size = level1_file.stat().st_size + print(f"✓ Level 1 created: {l1_size} bytes (~{l1_size // 4} tokens)") + print("") + + # Create Level 2 + print("Creating Level 2 (Unique Failures)...") + level2_file = workspace / "evidence-level2-unique.txt" + extract_level2_unique_failures(log_file, level2_file) + l2_size = level2_file.stat().st_size + print(f"✓ Level 2 created: {l2_size} bytes (~{l2_size // 4} tokens)") + print("") + + # Create Level 3 only if needed + if size_mb > 5: + print("Creating Level 3 (Full Context) - large log detected...") + level3_file = workspace / "evidence-level3-full.txt" + extract_level3_full_context(log_file, level3_file) + l3_size = level3_file.stat().st_size + print(f"✓ Level 3 created: {l3_size} bytes (~{l3_size // 4} tokens)") + else: + print("Skipping Level 3 (log is small enough for Level 2 analysis)") + print("") + + print("=== Tiered Extraction Complete ===") + print("Analysis workflow:") + print("1. Read Level 1 for quick overview and classification hints") + print("2. Read Level 2 for detailed error messages and retry patterns") + print("3. Read Level 3 (if exists) for deep dive analysis") + print("") + + +def analyze_retry_patterns(log_file: Path) -> str: + """Analyze retry patterns (deterministic vs flaky). + + Args: + log_file: Path to log file + + Returns: + Analysis string + """ + log_content = log_file.read_text(encoding='utf-8', errors='ignore') + lines = log_content.split('\n') + + output = [] + output.append("=" * 80) + output.append("RETRY PATTERN ANALYSIS") + output.append("=" * 80) + output.append("") + + # Check if retries are enabled + has_retries = any("Run " in line and ":" in line for line in lines) + if not has_retries: + output.append("No retry patterns detected (Surefire rerunFailingTestsCount not enabled)") + return "\n".join(output) + + output.append("Surefire retry mechanism detected") + output.append("") + + # Parse errors (deterministic failures) + output.append("=== DETERMINISTIC FAILURES (All Retries Failed) ===") + + error_section_start = None + for i, line in enumerate(lines): + if "[ERROR] Errors:" in line: + error_section_start = i + break + + if error_section_start is not None: + # Extract error section until flakes section + error_section = [] + for i in range(error_section_start, min(len(lines), error_section_start + 100)): + line = lines[i] + if "[WARNING] Flakes:" in line: + break + error_section.append(line) + + # Find test names + test_names = set() + for line in error_section: + if "[ERROR]" in line and "com." in line and "Run " not in line: + match = re.search(r'\[ERROR\]\s+([^\s]+)', line) + if match: + test_names.add(match.group(1)) + + if test_names: + for test in sorted(test_names): + test_simple = test.split('.')[-1] + retry_count = sum(1 for line in error_section if f"Run " in line and test_simple in line) + if retry_count == 0: + output.append(f" • {test} - Failed on first attempt (no retries or all 4 attempts failed)") + else: + output.append(f" • {test} - Failed {retry_count}/{retry_count} retries (100% failure rate)") + else: + output.append(" None") + else: + output.append(" None") + output.append("") + + # Parse flakes (intermittent failures) + output.append("=== FLAKY TESTS (Passed Some Retries) ===") + + flake_section_start = None + for i, line in enumerate(lines): + if "[WARNING] Flakes:" in line: + flake_section_start = i + break + + if flake_section_start is not None: + flake_section = lines[flake_section_start:flake_section_start + 200] + + # Find test names + test_names = set() + for line in flake_section: + if "[WARNING]" in line and "com." in line: + match = re.search(r'\[WARNING\]\s+([^\s]+)', line) + if match: + test_names.add(match.group(1)) + + if test_names: + for test in sorted(test_names): + test_simple = test.split('.')[-1] + # Find section for this test + test_section = [] + in_test = False + for line in flake_section: + if f"[WARNING] {test}" in line: + in_test = True + if in_test: + test_section.append(line) + if line.strip() == "" or ("[INFO]" in line and "[WARNING]" not in line): + break + + pass_count = sum(1 for line in test_section if "PASS" in line) + error_count = sum(1 for line in test_section if "[ERROR]" in line and "Run " in line) + total_runs = pass_count + error_count + + if total_runs > 0: + failure_rate = (error_count * 100) // total_runs + output.append(f" • {test} - Failed {error_count}/{total_runs} retries ({failure_rate}% failure rate, {pass_count} passed)") + else: + output.append(f" • {test} - Unable to parse retry counts") + else: + output.append(" None") + else: + output.append(" None") + output.append("") + + # Summary statistics + error_count = sum(1 for line in error_section if "[ERROR]" in line and "com." in line and "Run " not in line) if error_section_start else 0 + flake_count = sum(1 for line in flake_section if "[WARNING]" in line and "com." in line) if flake_section_start else 0 + + output.append("=== SUMMARY ===") + output.append(f"Deterministic failures: {error_count} test(s)") + output.append(f"Flaky tests: {flake_count} test(s)") + output.append(f"Total problematic tests: {error_count + flake_count}") + output.append("") + + # Classification guidance + if error_count > 0: + output.append(f"⚠️ BLOCKING: {error_count} deterministic failure(s) detected") + output.append(" These tests fail consistently and indicate real bugs or incomplete fixes") + + if flake_count > 0: + output.append(f"⚠️ WARNING: {flake_count} flaky test(s) detected") + output.append(" These tests pass sometimes, indicating timing/concurrency issues") + output.append("") + + output.append("=" * 80) + + return "\n".join(output) + diff --git a/.claude/skills/cicd-diagnostics/utils/workspace.py b/.claude/skills/cicd-diagnostics/utils/workspace.py new file mode 100755 index 000000000000..aaf29cf5884a --- /dev/null +++ b/.claude/skills/cicd-diagnostics/utils/workspace.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +"""Diagnostic Workspace Management Utilities. + +Handles creation, caching, and organization of diagnostic artifacts. +""" + +import os +import subprocess +import stat +from pathlib import Path +from typing import Optional + + +def get_repo_root() -> Path: + """Get repository root (works from any subdirectory).""" + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=True + ) + return Path(result.stdout.strip()) + except (subprocess.CalledProcessError, FileNotFoundError): + return Path(".").resolve() + + +def create_diagnostic_workspace(run_id: str) -> Path: + """Create diagnostic workspace (no timestamp - reusable by run ID). + + Args: + run_id: GitHub Actions run ID + + Returns: + Path to diagnostic directory + """ + repo_root = get_repo_root() + diagnostic_dir = repo_root / ".claude" / "diagnostics" / f"run-{run_id}" + diagnostic_dir.mkdir(parents=True, exist_ok=True) + return diagnostic_dir + + +def find_existing_diagnostic(run_id: str) -> Optional[Path]: + """Find existing diagnostic workspace for a run ID. + + Args: + run_id: GitHub Actions run ID + + Returns: + Path to existing directory or None + """ + repo_root = get_repo_root() + diagnostic_dir = repo_root / ".claude" / "diagnostics" / f"run-{run_id}" + + if diagnostic_dir.exists() and diagnostic_dir.is_dir(): + return diagnostic_dir + return None + + +def get_diagnostic_workspace(run_id: str, force_clean: bool = False) -> Path: + """Get or create diagnostic workspace (with caching). + + Args: + run_id: GitHub Actions run ID + force_clean: If True, remove existing workspace and start fresh + + Returns: + Path to diagnostic directory (existing or new) + """ + repo_root = get_repo_root() + diagnostic_dir = repo_root / ".claude" / "diagnostics" / f"run-{run_id}" + + # Clean existing workspace if requested + if force_clean and diagnostic_dir.exists(): + print(f"🗑️ Cleaning existing workspace: {diagnostic_dir}", file=os.sys.stderr) + import shutil + shutil.rmtree(diagnostic_dir) + + if diagnostic_dir.exists(): + print(f"✓ Reusing existing diagnostic workspace: {diagnostic_dir}", file=os.sys.stderr) + print(" (Cached logs and metadata will be reused)", file=os.sys.stderr) + return diagnostic_dir + else: + diagnostic_dir.mkdir(parents=True, exist_ok=True) + print(f"✓ Created new diagnostic workspace: {diagnostic_dir}", file=os.sys.stderr) + return diagnostic_dir + + +def save_artifact(diagnostic_dir: Path, filename: str, content: str) -> None: + """Save artifact to diagnostic workspace. + + Args: + diagnostic_dir: Diagnostic workspace directory + filename: Name of the file to save + content: Content to write + """ + artifact_path = diagnostic_dir / filename + artifact_path.write_text(content, encoding='utf-8') + + +def artifact_exists(diagnostic_dir: Path, filename: str) -> bool: + """Check if artifact exists in workspace. + + Args: + diagnostic_dir: Diagnostic workspace directory + filename: Name of the file to check + + Returns: + True if exists and non-empty, False otherwise + """ + artifact_path = diagnostic_dir / filename + return artifact_path.exists() and artifact_path.stat().st_size > 0 + + +def get_or_fetch_artifact(diagnostic_dir: Path, filename: str, fetch_command: list) -> Path: + """Get cached artifact or fetch new. + + Args: + diagnostic_dir: Diagnostic workspace directory + filename: Name of the artifact file + fetch_command: Command to run if artifact doesn't exist (list of args) + + Returns: + Path to artifact file + """ + artifact_path = diagnostic_dir / filename + + if artifact_exists(diagnostic_dir, filename): + print(f"✓ Using cached artifact: {filename}", file=os.sys.stderr) + return artifact_path + else: + print(f"Fetching {filename}...", file=os.sys.stderr) + result = subprocess.run( + fetch_command, + capture_output=True, + text=True, + check=True + ) + artifact_path.write_text(result.stdout, encoding='utf-8') + print(f"✓ Saved to: {artifact_path}", file=os.sys.stderr) + return artifact_path + + +def ensure_gitignore_diagnostics() -> None: + """Ensure .gitignore includes diagnostic directories.""" + repo_root = get_repo_root() + gitignore_path = repo_root / ".gitignore" + + gitignore_content = "" + if gitignore_path.exists(): + gitignore_content = gitignore_path.read_text(encoding='utf-8') + + if ".claude/diagnostics/" not in gitignore_content: + gitignore_content += "\n# Claude Code diagnostic outputs\n.claude/diagnostics/\n" + gitignore_path.write_text(gitignore_content, encoding='utf-8') + print("✓ Added .claude/diagnostics/ to .gitignore", file=os.sys.stderr) + + +def list_diagnostic_workspaces() -> list[Path]: + """List all diagnostic workspaces. + + Returns: + List of workspace paths, sorted by name (most recent first) + """ + repo_root = get_repo_root() + diagnostics_dir = repo_root / ".claude" / "diagnostics" + + if not diagnostics_dir.exists(): + return [] + + workspaces = [ + p for p in diagnostics_dir.iterdir() + if p.is_dir() and p.name.startswith("run-") + ] + return sorted(workspaces, reverse=True) + + +def get_workspace_age(diagnostic_dir: Path) -> int: + """Get workspace age in hours. + + Args: + diagnostic_dir: Diagnostic workspace directory + + Returns: + Age in hours, or -1 if directory doesn't exist + """ + if not diagnostic_dir.exists(): + return -1 + + dir_timestamp = diagnostic_dir.stat().st_mtime + current_timestamp = os.path.getmtime(diagnostic_dir) + age_seconds = current_timestamp - dir_timestamp + age_hours = int(age_seconds / 3600) + + return age_hours + + +def clean_old_diagnostics(max_age_hours: int = 168, max_count: int = 50) -> int: + """Clean old diagnostic workspaces. + + Args: + max_age_hours: Maximum age in hours (default: 168 = 7 days) + max_count: Maximum number to keep (default: 50) + + Returns: + Number of workspaces removed + """ + print(f"Cleaning diagnostic workspaces older than {max_age_hours} hours...", file=os.sys.stderr) + + workspaces = list_diagnostic_workspaces() + removed = 0 + + for i, workspace in enumerate(workspaces, 1): + age = get_workspace_age(workspace) + + if age >= max_age_hours or i > max_count: + print(f" Removing: {workspace} (age: {age}h)", file=os.sys.stderr) + import shutil + shutil.rmtree(workspace) + removed += 1 + + print(f"✓ Cleaned {removed} old diagnostic workspace(s)", file=os.sys.stderr) + return removed + + +def get_workspace_summary(diagnostic_dir: Path) -> str: + """Get workspace summary. + + Args: + diagnostic_dir: Diagnostic workspace directory + + Returns: + Summary string + """ + if not diagnostic_dir.exists(): + return f"Workspace not found: {diagnostic_dir}" + + import shutil + age = get_workspace_age(diagnostic_dir) + size = shutil.disk_usage(diagnostic_dir).used + + lines = [ + "=== Diagnostic Workspace Summary ===", + f"Path: {diagnostic_dir}", + f"Age: {age} hours", + f"Size: {size} bytes", + "Files:" + ] + + for file_path in sorted(diagnostic_dir.iterdir()): + if file_path.is_file(): + size_str = f"{file_path.stat().st_size:,} bytes" + lines.append(f" {file_path.name:<40} {size_str:>10}") + + return "\n".join(lines) + + +def init_diagnostic_structure(diagnostic_dir: Path) -> None: + """Create standard diagnostic file structure. + + Args: + diagnostic_dir: Diagnostic workspace directory + """ + diagnostic_dir.mkdir(parents=True, exist_ok=True) + (diagnostic_dir / "error-summary.txt").touch() + (diagnostic_dir / "analysis-notes.txt").touch() + + print(f"✓ Initialized diagnostic structure in {diagnostic_dir}", file=os.sys.stderr) + + diff --git a/.claude/skills/sdk-analytics/SKILL.md b/.claude/skills/sdk-analytics/SKILL.md new file mode 100644 index 000000000000..d6f04c8afe05 --- /dev/null +++ b/.claude/skills/sdk-analytics/SKILL.md @@ -0,0 +1,959 @@ +--- +name: SDK Analytics Installer +description: Use this skill when the user asks to install, configure, or set up @dotcms/analytics, sdk-analytics, analytics SDK, add analytics tracking, or mentions installing analytics in Next.js or React projects +allowed-tools: Read, Write, Edit, Bash, Grep, Glob +version: 1.0.0 +--- + +# DotCMS SDK Analytics Installation Guide + +This skill provides step-by-step instructions for installing and configuring the `@dotcms/analytics` SDK in the Next.js example project at `/core/examples/nextjs`. + +## Overview + +The `@dotcms/analytics` SDK is dotCMS's official JavaScript library for tracking content-aware events and analytics. It provides: + +- Automatic page view tracking +- Conversion tracking (purchases, downloads, sign-ups, etc.) +- Custom event tracking +- Session management (30-minute timeout) +- Anonymous user identity tracking +- UTM campaign parameter tracking +- Event batching/queuing for performance + +## 🚨 Important: Understanding the Analytics Components + +**CRITICAL**: `useContentAnalytics()` **ALWAYS requires config as a parameter**. The hook does NOT use React Context. + +### Component Roles + +1. **``** - Auto Page View Tracker + + - Only purpose: Automatically track pageviews on route changes + - **NOT a React Context Provider** + - Does **NOT** provide config to child components + - Place in root layout for automatic pageview tracking + +2. **`useContentAnalytics(config)`** - Manual Tracking Hook + - Used for custom event tracking + - **ALWAYS requires config parameter** + - Import centralized config in each component that uses it + +### Correct Usage Pattern + +```javascript +// 1. Create centralized config file (once) +// /src/config/analytics.config.js +export const analyticsConfig = { + siteAuth: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_SITE_KEY, + server: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_HOST, + autoPageView: true, + debug: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_DEBUG === "true", +}; + +// 2. Add DotContentAnalytics to layout for auto pageview tracking (optional) +// /src/app/layout.js +import { DotContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +; + +// 3. Import config in every component that uses the hook +// /src/components/MyComponent.js +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +const { track } = useContentAnalytics(analyticsConfig); // ✅ Config required! +``` + +**Why centralize config?** While you must import it in each component, centralizing prevents duplication and makes updates easier. + +## Quick Setup Summary + +Here's the complete setup flow: + +``` +1. Install package + └─> npm install @dotcms/analytics + +2. Create centralized config file + └─> /src/config/analytics.config.js + └─> export const analyticsConfig = { siteAuth, server, debug, ... } + +3. (Optional) Add DotContentAnalytics for auto pageview tracking + └─> /src/app/layout.js + └─> import { analyticsConfig } from "@/config/analytics.config" + └─> + +4. Import config in EVERY component that uses the hook + └─> /src/components/MyComponent.js + └─> import { analyticsConfig } from "@/config/analytics.config" + └─> const { track } = useContentAnalytics(analyticsConfig) // ✅ Config required! +``` + +**Key Benefits of Centralized Config**: + +- ✅ Single source of truth for configuration values +- ✅ Easy to update environment variables in one place +- ✅ Consistent config across all components +- ✅ Better than duplicating config in every file + +## Installation Steps + +### 1. Install the Package + +Navigate to the Next.js example directory and install the package: + +```bash +cd /core/examples/nextjs +npm install @dotcms/analytics +``` + +### 2. Verify Installation + +Check that the package was added to `package.json`: + +```bash +grep "@dotcms/analytics" package.json +``` + +Expected output: `"@dotcms/analytics": "latest"` or similar version. + +### 3. Create Centralized Analytics Configuration + +Create a dedicated configuration file to centralize your analytics settings. This makes it easier to maintain and reuse across your application. + +**File**: `/core/examples/nextjs/src/config/analytics.config.js` + +```javascript +/** + * Centralized analytics configuration for dotCMS Content Analytics + * + * This configuration is used by: + * - DotContentAnalytics provider in layout.js + * - useContentAnalytics() hook when used standalone (optional) + * + * Environment variables required: + * - NEXT_PUBLIC_DOTCMS_ANALYTICS_SITE_KEY + * - NEXT_PUBLIC_DOTCMS_ANALYTICS_HOST + * - NEXT_PUBLIC_DOTCMS_ANALYTICS_DEBUG (optional) + */ +export const analyticsConfig = { + siteAuth: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_SITE_KEY, + server: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_HOST, + autoPageView: true, // Automatically track page views on route changes + debug: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_DEBUG === "true", + queue: { + eventBatchSize: 15, // Send when 15 events are queued + flushInterval: 5000, // Or send every 5 seconds (ms) + }, +}; +``` + +**Benefits of this approach**: + +- ✅ Single source of truth for analytics configuration +- ✅ Easy to import and reuse across components +- ✅ Centralized environment variable management +- ✅ Type-safe and IDE autocomplete friendly +- ✅ Easy to test and mock in unit tests + +### 4. Configure Analytics in Next.js Layout + +Update the root layout file to include the analytics provider using the centralized config. + +**File**: `/core/examples/nextjs/src/app/layout.js` + +```javascript +import { Inter } from "next/font/google"; +import "./globals.css"; + +const inter = Inter({ subsets: ["latin"] }); + +export default function RootLayout({ children }) { + return ( + + {children} + + ); +} +``` + +**Updated with Analytics** (using centralized config): + +```javascript +import { Inter } from "next/font/google"; +import { DotContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; +import "./globals.css"; + +const inter = Inter({ subsets: ["latin"] }); + +export default function RootLayout({ children }) { + return ( + + + + {children} + + + ); +} +``` + +### 4. Add Environment Variables + +Create or update `.env.local` file in the Next.js project root: + +**File**: `/core/examples/nextjs/.env.local` + +```bash +# dotCMS Analytics Configuration +NEXT_PUBLIC_DOTCMS_SITE_AUTH=your_site_auth_key_here +NEXT_PUBLIC_DOTCMS_SERVER=https://your-dotcms-server.com +``` + +**Important**: Replace `your_site_auth_key_here` with your actual dotCMS Analytics site auth key. This can be obtained from the Analytics app in your dotCMS instance. + +### 5. Add `.env.local` to `.gitignore` + +Ensure the environment file is not committed to version control: + +```bash +# Check if already ignored +grep ".env.local" /core/examples/nextjs/.gitignore + +# If not present, add it +echo ".env.local" >> /core/examples/nextjs/.gitignore +``` + +## Usage Examples + +### Basic Setup (Automatic Page Views) + +With the configuration above, page views are automatically tracked on every route change. No additional code needed! + +### Manual Page View with Custom Data + +Track page views with additional context: + +```javascript +"use client"; + +import { useEffect } from "react"; +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function MyComponent() { + // ✅ ALWAYS pass config - import from centralized config file + const { pageView } = useContentAnalytics(analyticsConfig); + + useEffect(() => { + // Track page view with custom data + pageView({ + contentType: "blog", + category: "technology", + author: "john-doe", + wordCount: 1500, + }); + }, []); + + return
Content here
; +} +``` + +### Track Custom Events + +Track specific user interactions: + +```javascript +"use client"; + +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function CallToActionButton() { + // ✅ ALWAYS pass config - import from centralized config file + const { track } = useContentAnalytics(analyticsConfig); + + const handleClick = () => { + // Track custom event + track("cta-click", { + button: "Buy Now", + location: "hero-section", + price: 299.99, + }); + }; + + return ; +} +``` + +### Form Submission Tracking + +```javascript +"use client"; + +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function ContactForm() { + const { track } = useContentAnalytics(analyticsConfig); + + const handleSubmit = async (e) => { + e.preventDefault(); + + // Track form submission + track("form-submit", { + formName: "contact-form", + formType: "lead-gen", + source: "homepage", + }); + + // Submit form... + }; + + return
{/* Form fields */}
; +} +``` + +### Video/Media Interaction Tracking + +```javascript +"use client"; + +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function VideoPlayer({ videoId }) { + const { track } = useContentAnalytics(analyticsConfig); + + const handlePlay = () => { + track("video-play", { + videoId, + duration: 120, + autoplay: false, + }); + }; + + const handleComplete = () => { + track("video-complete", { + videoId, + watchPercentage: 100, + }); + }; + + return ( + + ); +} +``` + +### E-commerce Product View Tracking + +```javascript +"use client"; + +import { useEffect } from "react"; +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function ProductPage({ product }) { + const { track } = useContentAnalytics(analyticsConfig); + + useEffect(() => { + // Track product view + track("product-view", { + productId: product.sku, + productName: product.title, + category: product.category, + price: product.price, + inStock: product.inventory > 0, + }); + }, [product]); + + return
{/* Product details */}
; +} +``` + +### Conversion Tracking (E-commerce Purchase) + +```javascript +"use client"; + +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function CheckoutButton({ product, quantity }) { + const { conversion } = useContentAnalytics(analyticsConfig); + + const handlePurchase = () => { + // Process checkout logic here... + // After successful payment confirmation: + + // Track conversion ONLY after successful purchase + conversion("purchase", { + value: product.price * quantity, + currency: "USD", + productId: product.sku, + productName: product.title, + quantity: quantity, + category: product.category, + }); + }; + + return ; +} +``` + +### Conversion Tracking (Lead Generation) + +```javascript +"use client"; + +import { useContentAnalytics } from "@dotcms/analytics/react"; +import { analyticsConfig } from "@/config/analytics.config"; + +function DownloadWhitepaper() { + const { conversion } = useContentAnalytics(analyticsConfig); + + const handleDownload = () => { + // Trigger download logic here... + // After download is successfully completed: + + // Track conversion ONLY after successful download + conversion("download", { + fileType: "pdf", + fileName: "whitepaper-2024.pdf", + category: "lead-magnet", + }); + }; + + return ( + + ); +} +``` + +## Configuration Options + +### Analytics Config Object + +| Option | Type | Required | Default | Description | +| -------------- | ----------------------------- | -------- | ---------------------- | ---------------------------------------------------------------- | +| `siteAuth` | `string` | Yes | - | Site authentication key from dotCMS Analytics | +| `server` | `string` | Yes | - | Your dotCMS server URL | +| `debug` | `boolean` | No | `false` | Enable verbose logging for debugging | +| `autoPageView` | `boolean` | No | `true` (React) | Automatically track page views on route changes | +| `queue` | `QueueConfig \| false` | No | Default queue settings | Event batching configuration | +| `impressions` | `ImpressionConfig \| boolean` | No | `false` | Content impression tracking (disabled by default) | +| `clicks` | `boolean` | No | `false` | Content click tracking with 300ms throttle (disabled by default) | + +### Queue Configuration + +Controls how events are batched and sent: + +| Option | Type | Default | Description | +| ---------------- | -------- | ------- | ---------------------------------------------- | +| `eventBatchSize` | `number` | `15` | Max events per batch - auto-sends when reached | +| `flushInterval` | `number` | `5000` | Time in ms between flushes | + +**Disable Queuing** (send immediately): + +```javascript +const analyticsConfig = { + siteAuth: "your_key", + server: "https://your-server.com", + queue: false, // Send events immediately +}; +``` + +### Impression Tracking Configuration + +Controls automatic tracking of content visibility: + +| Option | Type | Default | Description | +| --------------------- | -------- | ------- | ----------------------------------------- | +| `visibilityThreshold` | `number` | `0.5` | Min percentage visible (0.0 to 1.0) | +| `dwellMs` | `number` | `750` | Min time visible in milliseconds | +| `maxNodes` | `number` | `1000` | Max elements to track (performance limit) | + +**Enable with defaults:** + +```javascript +const analyticsConfig = { + siteAuth: "your_key", + server: "https://your-server.com", + impressions: true, // 50% visible, 750ms dwell, 1000 max nodes +}; +``` + +**Custom thresholds:** + +```javascript +const analyticsConfig = { + siteAuth: "your_key", + server: "https://your-server.com", + impressions: { + visibilityThreshold: 0.7, // Require 70% visible + dwellMs: 1000, // Must be visible for 1 second + maxNodes: 500, // Track max 500 elements + }, +}; +``` + +**How it works:** + +- ✅ Tracks contentlets marked with `dotcms-analytics-contentlet` class and `data-dot-analytics-*` attributes +- ✅ Uses Intersection Observer API for high performance and battery efficiency +- ✅ Only fires when element is ≥50% visible for ≥750ms (configurable) +- ✅ Only tracks during active tab (respects page visibility) +- ✅ One impression per contentlet per session (no duplicates) +- ✅ Automatically disabled in dotCMS editor mode + +### Click Tracking Configuration + +Controls automatic tracking of user interactions with content elements. + +**Enable click tracking:** + +```javascript +const analyticsConfig = { + siteAuth: "your_key", + server: "https://your-server.com", + clicks: true, // Enable with 300ms throttle (fixed) +}; +``` + +**How it works:** + +- ✅ Tracks clicks on `` and ` +``` + +**Complete Configuration Example:** + +```javascript +// /config/analytics.config.js +export const analyticsConfig = { + siteAuth: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_SITE_KEY, + server: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_HOST, + autoPageView: true, + debug: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_DEBUG === "true", + queue: { + eventBatchSize: 15, + flushInterval: 5000, + }, + impressions: { + visibilityThreshold: 0.5, // 50% visible + dwellMs: 750, // 750ms dwell time + maxNodes: 1000, // Track up to 1000 elements + }, + clicks: true, // Enable click tracking (300ms throttle, fixed) +}; +``` + +## Data Captured Automatically + +The SDK automatically enriches events with: + +### Page View Events + +- **Page Data**: URL, title, referrer, path, protocol, search params, hash +- **Device Data**: Screen resolution, viewport size, language, user agent +- **UTM Parameters**: Campaign tracking (source, medium, campaign, term, content) +- **Context**: Site key, session ID, user ID, timestamp + +### Custom Events + +- **Context**: Site key, session ID, user ID +- **Device Data**: Screen resolution, language, viewport dimensions +- **Custom Properties**: Any data you pass to `track()` + +## Session Management + +- **Duration**: 30-minute timeout of inactivity +- **Reset Conditions**: + - At midnight UTC + - When UTM campaign changes +- **Storage**: Uses `dot_analytics_session_id` in localStorage + +## Identity Tracking + +- **Anonymous User ID**: Persisted across sessions +- **Storage Key**: `dot_analytics_user_id` +- **Behavior**: Generated automatically on first visit, reused on subsequent visits + +## Testing & Debugging + +### Enable Debug Mode + +Set `debug: true` in config to see verbose logging: + +```javascript +const analyticsConfig = { + siteAuth: "your_key", + server: "https://your-server.com", + debug: true, // Enable debug logging +}; +``` + +### Verify Events in Network Tab + +1. Open browser DevTools � Network tab +2. Filter by: `/api/v1/analytics/content/event` +3. Perform actions in your app +4. Check request payloads to see captured data + +### Check Storage + +Open browser DevTools � Application � Local Storage: + +- `dot_analytics_user_id` - Anonymous user identifier +- `dot_analytics_session_id` - Current session ID +- `dot_analytics_session_utm` - UTM campaign data +- `dot_analytics_session_start` - Session start timestamp + +## Troubleshooting + +### Events Not Appearing + +1. **Verify Configuration**: + + - Check `siteAuth` and `server` are correct + - Enable `debug: true` to see console logs + +2. **Check Network Requests**: + + - Look for requests to `/api/v1/analytics/content/event` + - Verify they're returning 200 status + +3. **Editor Mode Detection**: + + - Analytics are automatically disabled inside dotCMS editor + - Test in preview or published mode + +4. **Environment Variables**: + - Ensure `.env.local` is loaded (restart dev server if needed) + - Verify variable names start with `NEXT_PUBLIC_` + +### Queue Not Flushing + +- Check `eventBatchSize` - might not be reaching threshold +- Verify `flushInterval` is appropriate for your use case +- Events auto-flush on page navigation/close via `visibilitychange` + +### Session Not Persisting + +- Check localStorage is enabled in browser +- Verify no browser extensions are blocking storage +- Check console for storage-related errors + +### Config File Issues + +1. **Import Path Not Found**: + + ```javascript + // ❌ Error: Cannot find module '@/config/analytics.config' + ``` + + - Verify the file exists at `/src/config/analytics.config.js` + - Check your `jsconfig.json` or `tsconfig.json` has the `@` alias configured: + ```json + { + "compilerOptions": { + "paths": { + "@/*": ["./src/*"] + } + } + } + ``` + +2. **Undefined Config Values**: + + ```javascript + // Config shows undefined for siteAuth or server + ``` + + - Verify environment variables are set in `.env.local` + - Restart dev server after changing `.env.local` + - Check variable names start with `NEXT_PUBLIC_` + +3. **Config Not Updated**: + - Clear Next.js cache: `rm -rf .next` + - Restart dev server: `npm run dev` + +## Integration with Existing Next.js Example + +The Next.js example at `/core/examples/nextjs` already uses other dotCMS SDK packages: + +- `@dotcms/client` - Core API client +- `@dotcms/experiments` - A/B testing +- `@dotcms/react` - React components +- `@dotcms/types` - TypeScript types +- `@dotcms/uve` - Universal Visual Editor + +Adding analytics complements these by providing: + +- Usage tracking across all content types +- User behavior insights +- Campaign performance metrics +- Content engagement analytics + +## API Reference + +### Component: `DotContentAnalytics` + +```typescript +interface AnalyticsConfig { + siteAuth: string; + server: string; + debug?: boolean; + autoPageView?: boolean; + queue?: QueueConfig | false; +} + +interface QueueConfig { + eventBatchSize?: number; + flushInterval?: number; +} + +; +``` + +### Hook: `useContentAnalytics` + +```typescript +interface ContentAnalyticsHook { + pageView: (customData?: Record) => void; + track: (eventName: string, properties?: Record) => void; + conversion: (name: string, options?: Record) => void; +} + +// ✅ CORRECT: Always pass config - import from centralized config file +import { analyticsConfig } from "@/config/analytics.config"; +const { pageView, track, conversion } = useContentAnalytics(analyticsConfig); +``` + +**CRITICAL**: The hook **ALWAYS requires config as a parameter**. There is no provider pattern for the hook - `` is only for auto pageview tracking and does NOT provide context to child components. + +**Always import and pass the centralized config** from `/config/analytics.config.js` to ensure consistency. + +### Methods + +#### `pageView(customData?)` + +Track a page view with optional custom data. Automatically captures page, device, UTM, and context data. + +**Parameters**: + +- `customData` (optional): Object with custom properties to attach + +**Example**: + +```javascript +pageView({ + contentType: "product", + category: "electronics", +}); +``` + +#### `track(eventName, properties?)` + +Track a custom event with optional properties. + +**Parameters**: + +- `eventName` (required): String identifier for the event (cannot be "pageview" or "conversion") +- `properties` (optional): Object with event-specific data + +**Example**: + +```javascript +track("button-click", { + label: "Subscribe", + location: "sidebar", +}); +``` + +#### `conversion(name, options?)` + +Track a conversion event (purchase, download, sign-up, etc.) with optional metadata. + +**⚠️ IMPORTANT: Conversion events are business events that should only be tracked after a successful action or completed goal.** Tracking conversions on clicks or attempts (before success) diminishes their value as conversion metrics. Only track conversions when: + +- ✅ Purchase is completed and payment is confirmed +- ✅ Download is successfully completed +- ✅ Sign-up form is submitted and account is created +- ✅ Form submission is successful and data is saved +- ✅ Any business goal is actually achieved + +**Parameters**: + +- `name` (required): String identifier for the conversion (e.g., "purchase", "download", "signup") +- `options` (optional): Object with conversion metadata (all properties go into `custom` object) + +**Examples**: + +```javascript +// Basic conversion (after successful download) +conversion("download"); + +// Conversion with custom metadata (after successful purchase) +conversion("purchase", { + value: 99.99, + currency: "USD", + productId: "SKU-12345", +}); + +// Conversion with additional context (after successful signup) +conversion("signup", { + source: "homepage", + plan: "premium", +}); +``` + +## Best Practices + +1. **Centralize Configuration**: Create a dedicated config file (`/config/analytics.config.js`) for all analytics settings + + ```javascript + // ✅ GOOD: Centralized config file + // /config/analytics.config.js + export const analyticsConfig = { + siteAuth: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_SITE_KEY, + server: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_HOST, + debug: process.env.NEXT_PUBLIC_DOTCMS_ANALYTICS_DEBUG === "true", + autoPageView: true, + }; + + // ❌ BAD: Inline config in multiple files + // component1.js + const config = { siteAuth: "...", server: "..." }; + // component2.js + const config = { siteAuth: "...", server: "..." }; // Duplicate! + ``` + +2. **Always Import and Pass Config**: The hook requires config as a parameter + + ```javascript + // ✅ CORRECT: Import centralized config in every component + // MyComponent.js + import { analyticsConfig } from "@/config/analytics.config"; + const { track } = useContentAnalytics(analyticsConfig); + + // ❌ WRONG: Inline config duplication + // MyComponent.js + const { track } = useContentAnalytics({ + siteAuth: "...", // Duplicated! + server: "...", // Duplicated! + }); + ``` + +3. **Use DotContentAnalytics for Auto PageViews**: Add to layout for automatic tracking + + ```javascript + // layout.js - For automatic pageview tracking only + import { analyticsConfig } from "@/config/analytics.config"; + ; + ``` + +4. **Environment Variables**: Always use environment variables for sensitive config (siteAuth) + +5. **Event Naming**: Use consistent, descriptive event names (e.g., `cta-click`, not just `click`) + +6. **Custom Data**: Include relevant context in event properties + +7. **Queue Configuration**: Use default queue settings unless you have specific performance needs + +8. **Debug Mode**: Enable only in development, disable in production + +9. **Auto Page Views**: Keep enabled for SPAs (Next.js) to track route changes + +## Related Resources + +- Analytics SDK README: `/core/core-web/libs/sdk/analytics/README.md` +- Package Location: `/core/core-web/libs/sdk/analytics/` +- Next.js Example: `/core/examples/nextjs/` + +## Quick Command Reference + +```bash +# Install package +cd /core/examples/nextjs +npm install @dotcms/analytics + +# Start Next.js dev server +npm run dev + +# Build for production +npm run build + +# Start production server +npm run start + +# Verify installation +npm list @dotcms/analytics +``` diff --git a/.cursor/rules/typescript-context.md b/.cursor/rules/typescript-context.md index 5060ace22d9e..c849370bb049 100644 --- a/.cursor/rules/typescript-context.md +++ b/.cursor/rules/typescript-context.md @@ -1,66 +1,172 @@ --- description: Angular frontend development context - loads only for Angular files -globs: ["core-web/**/*.ts", "core-web/**/*.html", "core-web/**/*.scss"] +globs: ["core-web/**/*.{ts,html,scss,css}"] alwaysApply: false --- # Angular Frontend Context -## Immediate Patterns (Copy-Paste Ready) +This project adheres to modern Angular best practices, emphasizing maintainability, performance, accessibility, and scalability. + +## TypeScript Best Practices + +* **Strict Type Checking:** Always enable and adhere to strict type checking. This helps catch errors early and improves code quality. +* **Prefer Type Inference:** Allow TypeScript to infer types when they are obvious from the context. This reduces verbosity while maintaining type safety. + * **Bad:** + ```typescript + let name: string = 'Angular'; + ``` + * **Good:** + ```typescript + let name = 'Angular'; + ``` +* **Avoid `any`:** Do not use the `any` type unless absolutely necessary as it bypasses type checking. Prefer `unknown` when a type is uncertain and you need to handle it safely. +* **Don't allow use enums, use `as const` instead, example:** + ```typescript + const MyEnum = { + VALUE1: 'value1', + VALUE2: 'value2', + } as const; + ``` +* **Private properties:** Use `#` prefix to indicate that a property is private, example: `#myPrivateProperty`. + * **Bad:** + ```typescript + private myPrivateProperty = 'private'; + ``` + * **Good:** + ```typescript + #myPrivateProperty = 'private'; + ``` + +## Angular Best Practices + +* **Standalone Components:** Always use standalone components, directives, and pipes. Avoid using `NgModules` for new features or refactoring existing ones. +* **Implicit Standalone:** When creating standalone components, you do not need to explicitly set `standalone: true` inside the `@Component`, `@Directive` and `@Pipe` decorators, as it is implied by default. + * **Bad:** + ```typescript + @Component({ + standalone: true, + // ... + }) + export class MyComponent {} + ``` + * **Good:** + ```typescript + @Component({ + // `standalone: true` is implied + // ... + }) + export class MyComponent {} + ``` +* **Signals for State Management:** Utilize Angular Signals for reactive state management within components and services. +* **Lazy Loading:** Implement lazy loading for feature routes to improve initial load times of your application. +* **NgOptimizedImage:** Use `NgOptimizedImage` for all static images to automatically optimize image loading and performance. +* **Host bindings:** Do NOT use the `@HostBinding` and `@HostListener` decorators. Put host bindings inside the `host` object of the `@Component` or `@Directive` decorator instead. + +## Components + +* **Single Responsibility:** Keep components small, focused, and responsible for a single piece of functionality. +* **`input()` and `output()` Functions:** Prefer `input()` and `output()` functions over the `@Input()` and `@Output()` decorators for defining component inputs and outputs. + * **Old Decorator Syntax:** + ```typescript + @Input() userId!: string; + @Output() userSelected = new EventEmitter(); + ``` + * **New Function Syntax:** + ```typescript + import { input, output } from '@angular/core'; + + // ... + $userId = input(''); + $userSelected = output(); + ``` +* **`computed()` for Derived State:** Use the `computed()` function from `@angular/core` for derived state based on signals. +* **`ChangeDetectionStrategy.OnPush`:** Always set `changeDetection: ChangeDetectionStrategy.OnPush` in the `@Component` decorator for performance benefits by reducing unnecessary change detection cycles. +* **Reactive Forms:** Prefer Reactive forms over Template-driven forms for complex forms, validation, and dynamic controls due to their explicit, immutable, and synchronous nature. +* **No `ngClass` / `NgClass`:** Do not use the `ngClass` directive. Instead, use native `class` bindings for conditional styling. + * **Bad:** + ```html +
+ ``` + * **Good:** + ```html +
+
+
+ ``` +* **No `ngStyle` / `NgStyle`:** Do not use the `ngStyle` directive. Instead, use native `style` bindings for conditional inline styles. + * **Bad:** + ```html +
+ ``` + * **Good:** + ```html +
+
+ ``` +* **File Structure:** Follow the file structure below for components. + * component-name/ + * component-name.component.ts # Logic + * component-name.component.html # Template + * component-name.component.scss # Styles + * component-name.component.spec.ts # Tests +* **For signals**, use the `$` prefix to indicate that it is a signal, example: `$mySignal` +* **For observables**, use the `$` suffix to indicate that it is an observable, example: `myObservable$` + +## State Management + +* **Signals for Local State:** Use signals for managing local component state. +* **`computed()` for Derived State:** Leverage `computed()` for any state that can be derived from other signals. +* **Pure and Predictable Transformations:** Ensure state transformations are pure functions (no side effects) and predictable. +* **Signal value updates:** Do NOT use `mutate` on signals, use `update` or `set` instead. +* **Signal Store:** For complex state management, use the Signal Store pattern, learn more here https://ngrx.io/guide/signals + +## Templates + +* **Simple Templates:** Keep templates as simple as possible, avoiding complex logic directly in the template. Delegate complex logic to the component's TypeScript code. +* **Native Control Flow:** Use the new built-in control flow syntax (`@if`, `@for`, `@switch`) instead of the older structural directives (`*ngIf`, `*ngFor`, `*ngSwitch`). + * **Old Syntax:** + ```html +
Content
+
{{ item }}
+ ``` + * **New Syntax:** + ```html + @if (isVisible) { +
Content
+ } + @for (item of items; track item.id) { +
{{ item }}
+ } + ``` +* **Async Pipe:** Use the `async` pipe to handle observables in templates. This automatically subscribes and unsubscribes, preventing memory leaks. + +## Services + +* **Single Responsibility:** Design services around a single, well-defined responsibility. +* **`providedIn: 'root'`:** Use the `providedIn: 'root'` option when declaring injectable services to ensure they are singletons and tree-shakable. +* **`inject()` Function:** Prefer the `inject()` function over constructor injection when injecting dependencies, especially within `provide` functions, `computed` properties, or outside of constructor context. + * **Old Constructor Injection:** + ```typescript + constructor(private myService: MyService) {} + ``` + * **New `inject()` Function:** + ```typescript + import { inject } from '@angular/core'; + + export class MyComponent { + private myService = inject(MyService); + // ... + } + ``` -### Modern Template Syntax (REQUIRED) -```html - -@if (isLoading()) { - -} @else { - -} +### Testing Patterns (CRITICAL) - -@for (item of items(); track item.id) { -
{{item.name}}
-} @empty { - -} +Always use Spectator with jest or Vitest for testing using @ngneat/spectator/jest package. - -@switch (status()) { - @case ('loading') { } - @case ('error') { } - @default { } -} -``` - -### Component Structure (REQUIRED) ```typescript -@Component({ - selector: 'dot-my-component', - standalone: true, // REQUIRED - imports: [CommonModule], - templateUrl: './my-component.html', - styleUrls: ['./my-component.scss'], // Note: plural - changeDetection: ChangeDetectionStrategy.OnPush -}) -export class MyComponent { - // Input/Output signals (REQUIRED) - data = input(); // NOT @Input() - config = input(); - change = output(); // NOT @Output() - - // State signals - loading = signal(false); - - // Computed signals - isValid = computed(() => this.data() && this.loading()); - - // Dependency injection - private service = inject(MyService); -} -``` +import { createComponentFactory, Spectator, byTestId, mockProvider } from '@ngneat/spectator/jest'; -### Testing Patterns (CRITICAL) -```typescript // Spectator setup const createComponent = createComponentFactory({ component: MyComponent, @@ -104,15 +210,6 @@ spectator.typeInElement('test', byTestId('name-input')); .feature-list__item--active { } ``` -### File Structure (REQUIRED) -``` -component-name/ -├── component-name.component.ts # Logic -├── component-name.component.html # Template -├── component-name.component.scss # Styles -└── component-name.component.spec.ts # Tests -``` - ## Build Commands ```bash # Development server @@ -126,10 +223,10 @@ cd core-web && yarn install # NOT npm install ``` ## Tech Stack -- **Angular**: 18.2.3 standalone components +- **Angular**: 20.3.9 standalone components - **UI**: PrimeNG 17.18.11, PrimeFlex 3.3.1 - **State**: NgRx Signals, Component Store -- **Build**: Nx 19.6.5 +- **Build**: Nx 20.5.1 - **Testing**: Jest + Spectator (REQUIRED) ## On-Demand Documentation diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000000..fecad5d1b9a6 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,193 @@ +# Copilot Coding Agent Instructions for dotCMS Core + +## Repository Overview +dotCMS is a **Universal Content Management System** - a large-scale enterprise CMS built with Java (backend) and Angular (frontend). The repository is a Maven multi-module project with an Nx monorepo for frontend code. + +**Tech Stack:** +- **Backend**: Java 21 runtime (Java 11 syntax for core), Maven, JAX-RS REST APIs +- **Frontend**: Angular 19+, TypeScript, Nx workspace, PrimeNG +- **Infrastructure**: Docker, PostgreSQL, Elasticsearch + +## Build Commands (Validated & Essential) + +### Quick Reference +```bash +# FASTEST build for simple backend changes (~2-3 min) +./mvnw install -pl :dotcms-core -DskipTests + +# Full build without Docker (~5-8 min) +./mvnw clean install -DskipTests -Ddocker.skip + +# Full build with Docker image (~8-15 min) +./mvnw clean install -DskipTests +``` + +### Testing Commands +**⚠️ CRITICAL: Never run full integration suite (60+ min). Always target specific tests:** +```bash +# Specific integration test class (~2-10 min) +./mvnw verify -pl :dotcms-integration -Dcoreit.test.skip=false -Dit.test=ContentTypeAPIImplTest + +# Specific test method +./mvnw verify -pl :dotcms-integration -Dcoreit.test.skip=false -Dit.test=MyTest#testMethod + +# JVM unit tests only +./mvnw test -pl :dotcms-core + +# Postman API tests (specific collection) +./mvnw verify -pl :dotcms-postman -Dpostman.test.skip=false -Dpostman.collections=ai +``` + +### Frontend Commands +```bash +cd core-web +yarn install # Install dependencies +nx run dotcms-ui:serve # Development server +nx run dotcms-ui:test # Run tests +nx run dotcms-ui:lint # Lint code +nx affected -t test # Test affected projects +``` + +## Project Structure + +``` +core/ +├── dotCMS/ # Main backend Java code +│ └── src/main/java/com/ # Java source files +├── core-web/ # Frontend (Angular/Nx monorepo) +│ ├── apps/dotcms-ui/ # Main admin UI +│ └── libs/ # Shared libraries and SDKs +├── dotcms-integration/ # Integration tests +├── dotcms-postman/ # Postman API tests +├── bom/application/pom.xml # Dependency versions (ADD versions here) +├── parent/pom.xml # Plugin management +└── .github/workflows/ # CI/CD pipelines +``` + +## Critical Patterns (Always Follow) + +### Maven Dependency Management +**ALWAYS add dependency versions to `bom/application/pom.xml`, NEVER to module POMs:** +```xml + + + 1.2.3 + + + + + com.example + my-library + ${my-library.version} + + + +``` + +### Java Coding Patterns +```java +// Configuration - ALWAYS use Config class +import com.dotmarketing.util.Config; +String value = Config.getStringProperty("key", "default"); + +// Logging - ALWAYS use Logger class +import com.dotmarketing.util.Logger; +Logger.info(this, "message"); + +// Services - ALWAYS use APILocator +import com.dotcms.api.system.APILocator; +ContentletAPI contentletAPI = APILocator.getContentletAPI(); + +// Null checking - ALWAYS use UtilMethods +import com.dotmarketing.util.UtilMethods; +if (UtilMethods.isSet(myString)) { } +``` + +### REST API Patterns +```java +@Path("/v1/resource") +@Tag(name = "Resource", description = "Resource operations") +public class ResourceEndpoint { + private final WebResource webResource = new WebResource(); + + @GET @Path("/{id}") + @Operation(summary = "Get by ID") + @ApiResponse(responseCode = "200", content = @Content( + schema = @Schema(implementation = ResponseEntityResourceView.class))) + @Produces(MediaType.APPLICATION_JSON) + public Response getById(@Context HttpServletRequest request, + @Context HttpServletResponse response, @PathParam("id") String id) { + InitDataObject initData = webResource.init(request, response, true); + // Business logic + } +} +``` + +### Angular/Frontend Patterns +```typescript +// Modern control flow (REQUIRED) +@if (condition()) { } +@for (item of items(); track item.id) { } + +// Modern inputs/outputs (REQUIRED) +data = input(); +onChange = output(); + +// Testing - use data-testid + +spectator.setInput('prop', value); // ALWAYS use setInput +``` + +## CI/CD and Validation + +### What Triggers CI +Changes to these paths trigger builds (from `.github/filters.yaml`): +- **Backend**: `dotCMS/**`, `bom/**`, `parent/**`, `pom.xml`, `dotcms-integration/**` +- **Frontend**: `core-web/**` +- **CLI**: `tools/dotcms-cli/**` + +### Required Test Flags +Tests are skipped by default. Enable with explicit flags: +```bash +-Dcoreit.test.skip=false # Integration tests +-Dpostman.test.skip=false # Postman tests +-Dkarate.test.skip=false # Karate tests +``` + +### Validation Checklist +Before committing: +1. Run relevant tests for changed code +2. Check no hardcoded secrets or sensitive data +3. Verify dependency versions are in `bom/application/pom.xml` +4. For REST endpoints: include Swagger/OpenAPI annotations + +## Key Files Reference + +| Purpose | Location | +|---------|----------| +| Backend source | `dotCMS/src/main/java/com/dotcms/` | +| Frontend source | `core-web/apps/dotcms-ui/`, `core-web/libs/` | +| Dependency versions | `bom/application/pom.xml` | +| Plugin versions | `parent/pom.xml` | +| Integration tests | `dotcms-integration/src/test/java/` | +| CI workflows | `.github/workflows/cicd_*.yml` | +| Change detection | `.github/filters.yaml` | + +## Common Issues and Solutions + +| Issue | Solution | +|-------|----------| +| Build fails with Java version | Requires Java 21. Set with SDKMAN: `sdk env install` | +| Tests skipped silently | Add `-D.test.skip=false` flag | +| Frontend build fails | Run `yarn install` first, requires Node 22.15+ | +| Dependency version conflict | Check `bom/application/pom.xml`, run `./mvnw dependency:tree` | +| Docker build fails | Use `-Ddocker.skip` for non-Docker builds | + +## Environment Requirements +- **Java**: 21.0.8+ (via SDKMAN with `.sdkmanrc`) +- **Node.js**: 22.15.0+ (via NVM with `.nvmrc`) +- **Maven**: 3.9+ (wrapper included: `./mvnw`) +- **Docker**: Required for integration tests + +--- +**Trust these instructions.** Only search the codebase if information here is incomplete or incorrect. diff --git a/.github/frontend.instructions.md b/.github/frontend.instructions.md new file mode 100644 index 000000000000..a2dc5a2b46e6 --- /dev/null +++ b/.github/frontend.instructions.md @@ -0,0 +1,142 @@ +--- +description: Frontend development instructions +applyTo: "core-web/**/*.{ts,html,scss,css}" +--- + +# Persona + +You are a dedicated Angular developer who thrives on leveraging the absolute latest features of the framework to build cutting-edge applications. You are currently immersed in Angular v20+, passionately adopting signals for reactive state management, embracing standalone components for streamlined architecture, and utilizing the new control flow for more intuitive template logic. Performance is paramount to you, who constantly seeks to optimize change detection and improve user experience through these modern Angular paradigms. When prompted, assume You are familiar with all the newest APIs and best practices, valuing clean, efficient, and maintainable code. + +## Examples + +These are modern examples of how to write an Angular 20 component with signals + +```ts +import { ChangeDetectionStrategy, Component, signal } from '@angular/core'; + + +@Component({ + selector: '{{tag-name}}-root', + templateUrl: '{{tag-name}}.html', + changeDetection: ChangeDetectionStrategy.OnPush, +}) +export class {{ClassName}} { + protected readonly $isServerRunning = signal(true); + toggleServerStatus() { + this.$isServerRunning.update(isServerRunning => !isServerRunning); + } +} +``` + +```css +.container { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + height: 100vh; + + button { + margin-top: 10px; + } +} +``` + +```html +
+ @if ($isServerRunning()) { + Yes, the server is running + } @else { + No, the server is not running + } + +
+``` + +When you update a component, be sure to put the logic in the ts file, the styles in the css file and the html template in the html file. + +## Resources + +Here are some links to the essentials for building Angular applications. Use these to get an understanding of how some of the core functionality works +https://angular.dev/essentials/components +https://angular.dev/essentials/signals +https://angular.dev/essentials/templates +https://angular.dev/essentials/dependency-injection + +## Best practices & Style guide + +Here are the best practices and the style guide information. + +### Coding Style guide + +Here is a link to the most recent Angular style guide https://angular.dev/style-guide + +### TypeScript Best Practices + +- Use strict type checking +- Prefer type inference when the type is obvious +- Avoid the `any` type; use `unknown` when type is uncertain +- Don't allow use enums, use `as const` instead. +- Use `#` prefix to indicate that a property is private, example: `#myPrivateProperty`. + +### Angular Best Practices + +- Always use standalone components over `NgModules` +- Do NOT set `standalone: true` inside the `@Component`, `@Directive` and `@Pipe` decorators +- Use signals for state management +- Implement lazy loading for feature routes +- Use `NgOptimizedImage` for all static images. +- Do NOT use the `@HostBinding` and `@HostListener` decorators. Put host bindings inside the `host` object of the `@Component` or `@Directive` decorator instead +- For signals, use the `$` prefix to indicate that it is a signal, example: `$mySignal` +- For observables, use the `$` suffix to indicate that it is an observable, example: `myObservable$` + +### Components + +- Keep components small and focused on a single responsibility +- Use `input()` signal instead of decorators, learn more here https://angular.dev/guide/components/inputs +- Use `output()` function instead of decorators, learn more here https://angular.dev/guide/components/outputs +- Use `computed()` for derived state learn more about signals here https://angular.dev/guide/signals. +- Set `changeDetection: ChangeDetectionStrategy.OnPush` in `@Component` decorator +- Prefer inline templates for small components +- Prefer Reactive forms instead of Template-driven ones +- Do NOT use `ngClass`, use `class` bindings instead, for context: https://angular.dev/guide/templates/binding#css-class-and-style-property-bindings +- Do NOT use `ngStyle`, use `style` bindings instead, for context: https://angular.dev/guide/templates/binding#css-class-and-style-property-bindings +- Do NOT use `@HostBinding` and `@HostListener` decorators. Put host bindings inside the `host` object of the `@Component` or `@Directive` decorator instead + +### State Management + +- Use signals for local component state +- Use `computed()` for derived state +- Keep state transformations pure and predictable +- Do NOT use `mutate` on signals, use `update` or `set` instead +- For complex state management, use the Signal Store pattern, learn more here https://ngrx.io/guide/signals + +### Templates + +- Keep templates simple and avoid complex logic +- Use native control flow (`@if`, `@for`, `@switch`) instead of `*ngIf`, `*ngFor`, `*ngSwitch` +- Use the async pipe to handle observables +- Use built in pipes and import pipes when being used in a template, learn more https://angular.dev/guide/templates/pipes# + +### Services + +- Design services around a single responsibility +- Use the `providedIn: 'root'` option for singleton services +- Use the `inject()` function instead of constructor injection + +### Testing + +- Always use Spectator with jest or Vitest for testing using `@ngneat/spectator` package. +- Use the `createComponentFactory` function to create a component factory. +- Use the `createDirectiveFactory` function to create a directive factory. +- Use the `createPipeFactory` function to create a pipe factory. +- Use the `createServiceFactory` function to create a service factory. +- Use the `createHostFactory` function to create a host factory. +- Use the `createRoutingFactory` function to create a routing factory. +- Use the `createHttpFactory` function to create a http factory. +- Use the `Spectator` class to create a spectator instance. +- Use the `byTestId` function to select a component by its test id. +- Use the `mockProvider` function to mock a service. +- Use the `detectChanges` function to trigger change detection. +- Use the `setInput` function to set an input value. +- Use the `click` function to click an element. \ No newline at end of file diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 1fda922218be..e0d531646913 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -1,211 +1,359 @@ -# dotCMS CI/CD Process Overview +# dotCMS CI/CD Workflows - Getting Started -This document provides an overview of the CI/CD process for dotCMS, explaining the structure of our workflows, the use of reusable components, and how we optimize our pipeline for efficiency and parallelism. +Welcome to the dotCMS CI/CD documentation! This guide will help you understand and work with our GitHub Actions workflows. -## Table of Contents +## 📚 Documentation Index -1. [File Structure](#file-structure) -2. [Important info and Best Practices](#important-info-and-best-practices) -3. [Overall Structure](#overall-structure) -4. [Top-Level Workflows](#top-level-workflows) -5. [Reusable Workflow Phases](#reusable-workflow-phases) -6. [Custom Actions](#custom-actions) -7. [Caching and Artifacts](#caching-and-artifacts) -8. [Parallel Execution](#parallel-execution) -9. [PR Verification Process](#pr-verification-process) -10. [Benefits of Our Approach](#benefits-of-our-approach) +- **[WORKFLOW_ARCHITECTURE.md](WORKFLOW_ARCHITECTURE.md)** - **START HERE!** + - Complete architecture with Mermaid diagrams + - All workflows, phases, actions, and their relationships + - Troubleshooting guide and common issues + - Performance optimization tips -## File structure +- **[maven-release-process.md](maven-release-process.md)** - Release How-To + - Step-by-step release instructions + - Field explanations with screenshots + - Post-release verification -Github only allows workflows, including reusable workflows (workflow components) to be placed into the .github/workflows directory. -Any subfolders are ignored. When there are many files such as we have this can get large and difficult to understand and maintain. -As such we will use a folder like naming convention to help organize and sort the workflow files. -Each element "folder" will be separated by an underscore allowing for a simple hierarchy to be encoded. -eg. cicd/comp/build-phase.yml will be represented as cicd_comp_build-phase.yml +- **[test-matrix.yml](../.github/test-matrix.yml)** - Test Configuration + - All test suite definitions + - DRY test configuration reference -The main initial workflows are using a numerical prefix to order these in the order a PR goes through these. -Also we are using a prefix here in the workflow name e.g. "-1 PR Check". Although Github has now introduced the -ability to bookmark a few workflows in the UI listing, these are not manually sorted and all other workflows are alphanumerically sorted -using the "-" followed by an index ensures these are at the top of the list and easy to find. +- **[filters.yaml](../.github/filters.yaml)** - Change Detection + - Path-based filters for conditional testing -The actions are not restricted and we use subfolders for these. +## 🚀 Quick Start -## Important info and Best Practices +### For Developers -- **Secrets**: Secrets should be stored in GitHub Secrets and accessed using the `${{ secrets.SECRET_NAME }}` syntax. -- The PR workflow is run before any code is reviewed and should not use secrets. Secrets will also not be available if run on a fork -- The exact name of the first Job "Initialize / Initialize" and the last job "Finalize / Final Status" is important for the PR and merge-queue workflows as the completion state of these indicate the start and success or failure of the workflow to the Checks. Changing these may result in the Checks to wait until time out. -- Try not to create new workflows where there already is one for the same trigger, handle all functionality for that trigger in the same place, make use of expanding on the new cicd process to take advantage of its features before creating a whole new flow. +**Understanding Workflow Failures:** +1. Check the failed workflow run in GitHub Actions +2. Look for the failed phase (Initialize, Build, Test, etc.) +3. Review logs for specific errors +4. See [Troubleshooting Guide](WORKFLOW_ARCHITECTURE.md#troubleshooting-guide) for common issues -## Overall Structure +**Creating a PR:** +- Your PR triggers `cicd_1-pr.yml` automatically +- Only runs tests for changed components (via filters) +- Must pass before merge queue entry +- See [PR Check Flow](WORKFLOW_ARCHITECTURE.md#1-pr-check-workflow-cicd_1-pryml) diagram -**NOTE: The current release process has not been migrated yet to use the reusable components and flow** +**Merging a PR:** +- Enters merge queue → `cicd_2-merge-queue.yml` +- Runs ALL tests to catch flaky issues +- Success → auto-merge to main +- Failure blocks all PRs behind it in queue -Our CI/CD process is built using GitHub Actions and is structured into three main components: +### For Release Managers -1. Top-level workflows -2. Reusable workflow phases -3. Custom actions +**Triggering a Release:** +1. Navigate to Actions → `-6 Release Process` +2. Click "Run workflow" +3. Enter release version (e.g., `24.12.31-01`) +4. Configure options (usually keep defaults) +5. Monitor progress in workflow run -This structure allows for a modular, efficient, and easily maintainable CI/CD pipeline. +See [How to Trigger a Release](WORKFLOW_ARCHITECTURE.md#how-to-trigger-a-release) for details. -## Top-Level Workflows +### For DevOps/Maintainers -We have several top-level workflows that handle different scenarios these can be found in .github/workflows/cicd_*.yml +**Adding a New Test Suite:** +1. Update `test-matrix.yml` with new configuration +2. No workflow changes needed! +3. Matrix auto-generates and parallelizes -1. **PR**: Triggered on pull requests to verify changes -2. **Merge Queue**: Runs when changes are ready to be merged into the main branch. -3. **Trunk**: Executes after changes are merged into the main branch. -4. **Nightly**: Runs daily to perform comprehensive tests and deployments. +**Modifying Workflows:** +1. Check if change belongs in a reusable phase +2. Test in PR workflow first +3. Document changes in this README -These workflows orchestrate the overall process by calling reusable workflow phases and custom actions as needed. +See [Maintenance Guide](WORKFLOW_ARCHITECTURE.md#maintenance) for more. -## Reusable Workflow Phases +## 📊 Workflow Overview -We use reusable workflow phases within our top level workflows to modularize our CI/CD process and emphasize a set of phases -any commit can go through: +### Main CI/CD Pipeline (6 Workflows) -1. **Initialize**: Sets up the environment and determines what needs to be run. -2. **Build**: Compiles the code and generates necessary artifacts. -3. **Test**: Runs various test suites (unit tests, integration tests, etc.). -4. **Semgrep**: Performs code quality analysis. -5. **Deployment**: Handles deployment to various environments. -6. **Release**: (TODO) Publishes releases to the appropriate channels. -6. **Finalize**: Aggregates results and performs cleanup tasks. -7. **Reporting**: Generates comprehensive reports of the CI/CD process run and sends notifications +| # | Workflow | Trigger | Purpose | +|---|----------|---------|---------| +| 1 | **PR Check** | PR opened/updated | Fast validation of changes | +| 2 | **Merge Queue** | PR ready to merge | Comprehensive testing | +| 3 | **Trunk** | Push to main | Deploy snapshots, build CLI | +| 4 | **Nightly** | 3:18 AM daily | Comprehensive validation | +| 5 | **LTS** | Push to release-* | LTS branch validation | +| 6 | **Release** | Manual trigger | Official releases | -These phases can be easily included and configured in different top-level workflows, reducing code duplication and ensuring consistency. +### Standard Phase Pattern -## Custom Actions +All workflows follow this pattern: +``` +Initialize → Build → Test → (Semgrep) → (CLI Build) → (Deploy) → Finalize → Report +``` + +See [Architecture Diagram](WORKFLOW_ARCHITECTURE.md#architecture-diagram) for complete relationships. + +## 🎯 Table of Contents + +1. [File Structure](#file-structure) +2. [Critical Information](#critical-information) +3. [Architecture Overview](#architecture-overview) +4. [Top-Level Workflows](#top-level-workflows) +5. [Reusable Workflow Phases](#reusable-workflow-phases) +6. [Custom Actions](#custom-actions) +7. [Workflow Configurations](#workflow-configurations) +8. [Benefits of Our Approach](#benefits-of-our-approach) -We have several custom actions that perform specific common tasks: +## File Structure -1. **Prepare Runner**: Sets up the runner environment. -2. **Setup Java**: Installs and configures Java and optionally GraalVM. -3. **Cleanup Runner**: Frees up disk space on the runner. -4. **Maven Job**: Runs Maven builds with extensive configuration options handles the common setup can caching needed +GitHub only allows workflows in `.github/workflows/` (no subfolders). We use a folder-like naming convention: -These actions encapsulate complex logic and can be reused across different workflows and phases. +**Format**: `category_subcategory_name.yml` +- Example: `cicd/comp/build-phase.yml` → `cicd_comp_build-phase.yml` -## Caching and Artifacts +**Prefixes**: +- **Numbers (1-6)**: Main CICD workflows in PR progression order +- **Dash prefix (-)**: Ensures top placement in GitHub UI (e.g., `-1 PR Check`) -We extensively use caching and artifacts to optimize our CI/CD process: +**Actions**: Can use subfolders (`.github/actions/category/action-name/`) -- **Caching**: We cache dependencies (Maven, Node.js, Yarn) and build outputs to speed up subsequent runs. -- **Artifacts**: We generate and share artifacts between jobs, allowing for parallel execution and result aggregation. +See [File Naming Convention](WORKFLOW_ARCHITECTURE.md#file-naming-convention) for details. -Key points: -- Maven repository is cached to speed up builds. -- Build outputs are saved as artifacts and can be used by subsequent jobs. -- Test results are saved as artifacts for later analysis and reporting. +## Critical Information -## Parallel Execution +⚠️ **Security Rules**: +- PR workflows run on **untrusted code** - never use secrets +- Secrets unavailable for fork PRs +- Use `cicd_post-workflow-reporting.yml` for notifications requiring secrets -Our structure allows for efficient parallel execution: +⚠️ **Job Naming Rules**: +- First job: `"Initialize / Initialize"` +- Last job: `"Finalize / Final Status"` +- These names signal workflow status to GitHub Checks +- Changing them breaks check detection (workflows hang until timeout) -1. The Initialize phase determines what needs to be run. -2. Long-running tasks like Integration and Postman tests can be executed in parallel. -3. Results and outputs from parallel jobs are aggregated in the Finalize phase. +⚠️ **Workflow Design**: +- Don't create duplicate workflows for same trigger +- Extend existing workflows instead +- Leverage reusable phase workflows +- Follow the standard phase pattern -This approach significantly reduces the overall execution time of our CI/CD pipeline. +## Architecture Overview -## PR Verification Process +Our CI/CD uses a **three-tier architecture** for modularity and maintainability: -A typical PR goes through the following steps: +``` +┌─────────────────────────────────────────────────────┐ +│ Top-Level Workflows (6) │ +│ cicd_1-pr.yml, cicd_2-merge-queue.yml, etc. │ +│ • Define triggers and orchestration │ +│ • Call reusable phases │ +└──────────────────┬──────────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────────┐ +│ Reusable Phase Workflows (10) │ +│ Initialize, Build, Test, Deploy, etc. │ +│ • Shared business logic │ +│ • Used by multiple top-level workflows │ +└──────────────────┬──────────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────────┐ +│ Composite Actions (15+) │ +│ maven-job, setup-java, deploy-docker, etc. │ +│ • Atomic operations │ +│ • Reusable across all workflows │ +└─────────────────────────────────────────────────────┘ +``` -1. **Initialize**: Determine what has changed and what needs to be verified. -2. **Build**: Compile the code and generate necessary artifacts. -3. **Parallel Testing**: Run various test suites concurrently (unit tests, integration tests, Postman tests). -4. **Semgrep Analysis**: Perform code quality checks. -5. **Finalize**: Aggregate results from all previous steps. -6. **Reporting**: Generate a comprehensive report of the PR check process. +**Benefits**: 70% less code duplication, consistent behavior, easier maintenance. -## Specific configurations for each top level workflow getting code to trunk (main) branch +See [Architecture Diagram](WORKFLOW_ARCHITECTURE.md#architecture-diagram) for complete visualization. -| Workflow | Trigger | Notes | -|---------------------|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `1-pr.yml` | Push of a PR to github | * Should not use secrets as it is run on code that has not been reviewed
* post-workflow-report.yml is run as a separate triggered workflow so it can have access to the secrets it needs
* For speed it does not run tests that should not be impacted by changes in the PR. Filters defined in .github/filters.yaml | -| `2.merge-queue.yml` | PR passed its checks and was added to the merge queue | * We force run all tests to catch flakey issues or incorrect filters.
* Merge group checks include all the code of PRs ahead of it in the queue. If successful after merge the main branch will have the same commit id that will end up as the HEAD of main.
failures in the merge queue should be monitored closely for flakey tests or misconfiguration failures here can slow the process for other developers trying to merge | -| `3-trunk.yml` | Runs on code that was pushed to trunk (main) | * As we already built and tested the same commit in the merge queue we can take advantage of that and use the build artifacts from that workflow to skip these steps
We currently build native cli artifacts in this phase due to the work required we do not want to run on every PR.
We run snapshot deployments here to github (trunk) deployments, snapshot artifactory etc. | -| `4-nightly.yml' | Runs on a nightly schedule and will run on the latest commit on main at the time | * Another chance to capture flakey build issues
We can add longer running tests here that would be impractical to run on every PR merged
Provides a more stable image to compare behavior from previous days
This currently runs using the default 1.0.0-SNAPSHOT image but with release changes this end up with a dated version on a nightly branch. The workflow triggered from the nightly cron will version and promote the code and a separate nightly workflow will build, test, deploy from that branch | +## Top-Level Workflows -## Further verification and promotion phases up to Release -**In Progress** +Located in `.github/workflows/cicd_*.yml`: -The aim is to have the main branch be in a releasable state. Our preceding steps and validations to get a PR into the main branch should be the primary gates to prevent an unreleasable bad commit. +| Workflow | Trigger | Duration | Key Features | +|----------|---------|----------|--------------| +| **1-PR** | PR open/update | 15-25 min | Selective tests, no secrets, fast feedback | +| **2-Merge Queue** | Ready to merge | 30-45 min | ALL tests, catches flaky tests | +| **3-Trunk** | Push to main | 20-30 min | Artifact reuse, CLI builds, snapshots | +| **4-Nightly** | 3:18 AM daily | 45-60 min | Trunk health monitor, early breakage detection | +| **5-LTS** | Push to release-* | 30-45 min | LTS branch validation | +| **6-Release** | Manual | 25-35 min | Production release, full deployment | -It is also key to the smooth development process also that issues are not introduced into the main branch that could cause failures when developers merge it into their own branches. +Each orchestrates the process by calling reusable phases and actions. -We still need go go through some further validations though before we can approve a specific commit on the main branch as acceptable for release. Some of these tests both automatic and manual can take some time -so we do not want to block the development process while these are being run. We will have a separate branch (release) or branches (test?,rc?,release)that will be used to promote the code from the main branch up to a release branch. Each step will provide a higher level of confidence. +See [Workflow Configurations](WORKFLOW_ARCHITECTURE.md#workflow-configurations-path-to-main-branch) for detailed comparison. -We will not make manual changes to these branches, the only changes from the core commit on main that will be made are to set the version for the build. This should be as minimal as possible and currently for maven can be done by adding just one file .mvn/maven.properties. -The more changes to the code are made the more opportunity that there is a change that impacts behavior that was not already tested in the previous steps. +## Reusable Workflow Phases -We can make the promotion process a manual action and can also make use of Github deploymnents and environments to specify required reviewers before promotion is done +Located in `.github/workflows/cicd_comp_*-phase.yml`: -If an issue is found, any fixes should be propagated through the development process in a new PR. The new code can replace the original intended version. This process allows for a stable commit that is being verified in each phase. -We should pull in changes from as quick as possible +| Phase | Purpose | Outputs | +|-------|---------|---------| +| **Initialize** | Detect changes, check for reusable artifacts | `found_artifacts`, `backend`, `frontend`, `build` | +| **Build** | Compile code, generate artifacts | `maven-repo` artifact | +| **Test** | Matrix-driven parallel test execution | Test results, build reports | +| **Semgrep** | Security and code quality scanning | Quality gate status | +| **CLI Build** | Multi-platform native CLI builds | CLI artifacts (Linux, macOS x2) | +| **Deployment** | Docker images, NPM packages, Artifactory | Docker tags, NPM versions | +| **Release Prepare** | Version validation, branch creation | Release version, tag, branch | +| **Release** | Artifactory, Javadocs, SBOM, labels | Release artifacts | +| **Finalize** | Aggregate results, determine status | `aggregate_status` | +| **Reporting** | Generate reports, send notifications | Slack messages, test reports | +**Key Principle**: Each phase can be independently configured and reused across different workflows. -```text +See [Detailed Flow Diagrams](WORKFLOW_ARCHITECTURE.md#detailed-flow-diagrams) for visual representation. -Before Nightly run After Nightly Promote Step - Test and deploy Test and deploy new versioned - versioned HEAD of nightly PR1A HEAD of nightly PR4B +## Custom Actions - -nightly: PR1A PR1A--PR2B--PR3B--PR4B - | | | | | -main: --PR1---PR2---PR3---PR4 run: --PR1---PR2---PR3---PR4 +Located in `.github/actions/`: + +### Core CI/CD Actions +- **maven-job**: Standardized Maven execution with caching, artifact handling +- **setup-java**: Java & GraalVM installation (supports multiple versions) +- **prepare-runner**: Pre-build environment setup +- **cleanup-runner**: Free disk space (critical for large builds) +- **api-limits-check**: Monitor GitHub API rate limits + +### Deployment Actions +- **deploy-docker**: Multi-platform Docker builds and pushes +- **deploy-jfrog**: Artifactory deployments +- **deploy-cli-npm**: CLI NPM package publishing +- **deploy-javadoc**: S3 javadoc uploads +- **deploy-javascript-sdk**: SDK NPM publishing + +### Notification & Support +- **notify-slack**: Slack message formatting and posting +- **issue-fetcher**: Fetch and parse issue details +- **issue-labeler**: Label management automation + +See [Key Actions](WORKFLOW_ARCHITECTURE.md#key-actions) for complete reference. + +## Workflow Configurations + +Each workflow has specific optimizations and purposes: + +### 1-PR (Pull Request Validation) +**Optimization**: Speed & Safety +- ✅ Selective testing (filters.yaml determines what runs) +- ❌ No secrets (unreviewed code) +- ⚡ Fast feedback (15-25 min typical) +- 📊 Post-workflow reporting (separate workflow with secrets) + +### 2-Merge Queue (Pre-Merge Validation) +**Optimization**: Comprehensive Testing +- ✅ ALL tests run (catches flaky tests) +- ✅ Tests combined code (includes PRs ahead in queue) +- ⚠️ Failures block all developers (monitor closely!) +- 🔒 Commit SHA matches future main HEAD + +### 3-Trunk (Post-Merge Deployment) +**Optimization**: Artifact Reuse +- ♻️ Reuses merge queue artifacts (saves 5-10 min) +- 🔨 Native CLI builds (3 platforms) +- 📦 Snapshot deployments (GitHub, Artifactory) +- 📚 Optional SDK publishing + +### 4-Nightly (Trunk Health Monitor) +**Optimization**: Early Problem Detection +- 🩺 Monitors trunk health (NOT release gate) +- 🚨 Catches breakage before changes accumulate +- 🎯 Critical for CI success & release frequency +- 🌙 Runs at 3:18 AM daily +- 🧪 Long-running tests (impractical for PRs) + +### 5-LTS (LTS Branch Validation) +**Optimization**: Long-Term Support +- 🔖 Triggered on release-* branches +- ✅ Full test suite +- 📋 Version-specific configuration + +### 6-Release (Production Release) +**Optimization**: Complete Deployment +- 🚀 Manual trigger only +- 📦 Full artifact deployment +- 🏷️ GitHub label management +- 📝 Complete documentation + +See [Workflow Configurations Table](WORKFLOW_ARCHITECTURE.md#workflow-configurations-path-to-main-branch) for detailed comparison. + +## Release Promotion Strategy + +**Goal**: Keep main branch always releasable while allowing thorough validation before official releases. + +**Philosophy**: +- PR validation prevents unreleasable commits +- Additional testing happens without blocking development +- Promotion branches get version changes only (minimal, reproducible) +- Fixes flow through normal PR process (no cherry-picking) + +**Release Promotion Flow**: +``` +PR → Merge Queue → Main → Manual QA/Smoke Testing (Required) → RC → Release ``` -The commits into nightly are not the exact same commit sha as the parent on main -The change between the two is determanistic and repreducable. We only add a ./mvn/maven.config containing the release version to embed and build with by default -We also provide the original SHA to link back to the source commit on main. -The exact same process can be used with a manual step to select when to sync up main to a test or release candidate branch -We do not pick and choose individual PRS to sync up, by default we would pull all the commits up to and including the HEAD commit on main There may -be a reason to select a previous commit but must always be a commit between what is already merged and the HEAD and will contain all the commits and changes inbetween. -The only difference will be the change in release number assigned to the commits which will help us with change logs. +**Trunk Health Monitoring** (Parallel, Not in Promotion Flow): +``` +Main → Nightly Tests (3:18 AM) → Alert on Failures +``` -**Example flow of PR through to Release** +**Key Points:** +- **Release Path**: Manual QA/Smoke testing is **always required** before RC +- **Nightly Tests**: Legacy workflow, **NOT part of release promotion** + - Purpose: Early detection of trunk breakage + - Critical: Prevents change accumulation (easier debugging) + - Result: Enabled increased release frequency over recent years +- Each release promotion step increases confidence without blocking development -```text +See [Release Promotion Process](WORKFLOW_ARCHITECTURE.md#release-promotion-process) for detailed diagrams and examples. -x indicates a promotion with version change +## Benefits of Our Approach +### Key Advantages -release PR1a--PR2b--PR3b--PR4b--PR5b--PR6b - |x | | | | |x -rc PR1A--PR2B--PR3B--PR4B--PR5C--PR6C--PR7D - |x | | |x | |x |x -main run: --PR1---PR2---PR3---PR4---PR5---PR6---PR7---PR8 +1. **Modularity** 📦 + - 70% reduction in code duplication + - Single source of truth for each phase + - Easy to maintain and extend -1. PR1 promoted to Release Candidate and RC testing occurs on PR1A rc-A -2. PR1A tested and approved for release with new release version. Release A - In the meantime PR2 and PR3 have been added to main and have no impact on RC branch -3. PR4 promoted to RC as version B and PR4B tested while PR5 is added to main. RC-B PR2B,PR3B,PR4B included -4. PR4B is not approved for release, PR6 adds a fix is promoted to RC as version C -5. PR6C is approved for release and promoted to release. -``` -Notes: +2. **Performance** ⚡ + - 40-50% faster PR checks (15-25 min vs 45 min) + - Parallel test execution (30 min vs 180 min) + - Artifact reuse saves 5-10 min per workflow -* RC can set build to a version that indicates it is a release candidate e.g. x.x.x-rc requring the release -version to be set on promotion to release, or it could be set with the final release number, in this case it must be -deployed to a staging deployment area and then the release promotion just moves the artifacts to the final destination. -This prevents the need for a new build of artifacts on release. -* A promotion could always require a new version, or it could retain the same version e.g. to maintain the intended next version number we want to release. In this case we should still maintain an internal build number to distinguish when the PR related to that version has been updated +3. **Cost Efficiency** 💰 + - 62% savings on macOS runners ($300/mo vs $800/mo) + - Strategic runner selection + - Conditional testing reduces waste +4. **Reliability** 🛡️ + - Consistent behavior across all workflows + - Catch flaky tests in merge queue + - Comprehensive error reporting -## Benefits of Our Approach +5. **Developer Experience** 👨‍💻 + - Fast feedback loops + - Clear failure messages + - Detailed troubleshooting guides + +See [Why This Architecture?](WORKFLOW_ARCHITECTURE.md#why-this-architecture) for detailed metrics and real-world impact. + +## Quick Reference + +**Common Tasks**: +- 🐛 Debugging failures → [Troubleshooting Guide](WORKFLOW_ARCHITECTURE.md#troubleshooting-guide) +- 🚀 Triggering releases → [How to Trigger a Release](WORKFLOW_ARCHITECTURE.md#how-to-trigger-a-release) +- ➕ Adding tests → [Adding New Tests](WORKFLOW_ARCHITECTURE.md#adding-new-tests) +- 📊 Understanding flows → [Detailed Flow Diagrams](WORKFLOW_ARCHITECTURE.md#detailed-flow-diagrams) -1. **Modularity**: Reusable workflows and custom actions make our pipeline easy to maintain and extend. -2. **Consistency**: Using reusable components ensures consistent execution across different scenarios. -3. **Efficiency**: Caching and parallel execution optimize the pipeline's performance. -4. **Flexibility**: Top-level workflows can easily be configured to include or exclude specific phases as needed. -5. **Scalability**: New test suites or deployment targets can be easily added to the existing structure. +**Need Help?** +- 📖 Full documentation → [WORKFLOW_ARCHITECTURE.md](WORKFLOW_ARCHITECTURE.md) +- 🔧 Release guide → [maven-release-process.md](maven-release-process.md) +- 💬 Questions → #devops on Slack -## Conclusion +--- -Our CI/CD process is designed to be efficient, flexible, and easy to maintain. By leveraging GitHub Actions' features like reusable workflows, custom actions, caching, and artifacts, we've created a robust pipeline that can handle the complex needs of the dotCMS project while remaining adaptable to future requirements. \ No newline at end of file +**Last Updated**: December 2024 +**Maintained By**: dotCMS DevOps Team \ No newline at end of file diff --git a/.github/workflows/WORKFLOW_ARCHITECTURE.md b/.github/workflows/WORKFLOW_ARCHITECTURE.md new file mode 100644 index 000000000000..dfb87ff2d048 --- /dev/null +++ b/.github/workflows/WORKFLOW_ARCHITECTURE.md @@ -0,0 +1,1045 @@ +# GitHub Workflows & Actions Architecture + +This document provides a comprehensive overview of the dotCMS GitHub workflows and actions architecture, showing all relationships and dependencies. + +## File Naming Convention + +GitHub only allows workflows (including reusable workflows) to be placed in the `.github/workflows/` directory. Subfolders are ignored. To maintain organization with many files, we use a folder-like naming convention: + +- **Hierarchy Separator**: Underscores (`_`) represent folder structure + - Example: `cicd/comp/build-phase.yml` → `cicd_comp_build-phase.yml` + +- **Numerical Prefixes**: Main CICD workflows use numbers to indicate PR progression order + - Example: `cicd_1-pr.yml`, `cicd_2-merge-queue.yml`, etc. + +- **Workflow Name Prefixes**: Use `-` prefix for top-level workflows (e.g., `-1 PR Check`) + - This ensures they appear at the top of GitHub's alphabetically-sorted workflow list + - Makes critical workflows easy to find in the UI + +**Important Notes**: +- The exact names of the first job `"Initialize / Initialize"` and last job `"Finalize / Final Status"` are critical for PR and merge-queue workflows +- These job names indicate workflow start/completion to GitHub Checks +- Changing these names may cause checks to wait until timeout + +## Quick Reference + +### Main CICD Workflows (Entry Points) +1. **cicd_1-pr.yml** - PR checks (on PR open/sync) +2. **cicd_2-merge-queue.yml** - Merge queue validation +3. **cicd_3-trunk.yml** - Main branch builds (on push to main) +4. **cicd_4-nightly.yml** - Nightly builds (scheduled 3:18 AM) +5. **cicd_5-lts.yml** - LTS branch testing (on push to release-*) +6. **cicd_6-release.yml** - Release process (manual trigger) + +### Common Phase Pattern +All main CICD workflows follow this pattern: +``` +Initialize → Build → Test → Semgrep → CLI Build → Deployment → Release → Finalize → Report +``` + +## Architecture Diagram + +This simplified diagram shows the high-level architecture. See [Workflow Dependency Matrix](#workflow-dependency-matrix) for detailed phase usage by each workflow. + +```mermaid +graph TB + %% Triggers Layer + subgraph Triggers[" "] + direction LR + TTitle["🎯 Workflow Triggers"] + T1[PR Events] + T2[Push to Main] + T2b[Push to release-* branches] + T3[Scheduled] + T4[Manual] + T5[Issue Events] + end + + %% Main CICD Workflows Layer + subgraph MainCICD[" "] + direction LR + WTitle["🚀 Main CICD Workflows"] + W1[1-PR Check] + W2[2-Merge Queue] + W3[3-Trunk] + W4[4-Nightly] + W5[5-LTS] + W6[6-Release] + end + + WNote["All follow standard pattern: Initialize → Build → Test → Deploy → Finalize"] + + %% Reusable Phases Layer + subgraph Phases[" "] + direction LR + PTitle["⚙️ Reusable Phase Workflows"] + PhaseList["10 Phase Workflows:
Initialize • Build • Test • Semgrep • CLI Build
Deploy • Release Prepare • Release • Finalize • PR Notifier"] + end + + %% Actions Layer + subgraph Actions[" "] + direction LR + ATitle["🔧 Composite Actions"] + A1["Core CICD:
maven-job • setup-java
prepare-runner • cleanup-runner"] + A2["Deployment:
deploy-docker • deploy-jfrog
deploy-cli-npm • deploy-javadoc"] + A3["Support:
notify-slack • issue-fetcher
issue-labeler • sbom-generator"] + end + + %% Configuration Layer + subgraph Config[" "] + direction LR + CTitle["📋 Configuration Files"] + C1[test-matrix.yml • filters.yaml] + C2[github-teams.json • slack-mappings.json] + end + + %% Side: Issue Management + subgraph Issues[" "] + direction TB + ITitle["🎫 Issue Management"] + I1[PR Opened → Link Issue] + I2[PR Merged → Update Issue] + I3[Stale Issues Cleanup] + end + + %% Vertical Flow - Main Pipeline + Triggers --> MainCICD + MainCICD --> WNote + WNote --> Phases + Phases --> Actions + Actions --> Config + + %% Side Flows + T5 -.->Issues + W1 -.triggers after.-> Report[Post-Workflow
Reporting] + W2 -.triggers after.-> Report + + %% Specific trigger connections + T1 -.->W1 + T1 -.->W2 + T2 -.->W3 + T2b -.->W5 + T3 -.->W4 + T4 -.->W6 + + %% Note + Config --> Note1["📌 See tables below for:
• Which workflows use which phases
• Detailed flow diagrams for each workflow
• Complete action reference"] + + %% Styling + classDef trigger fill:#e1f5ff,stroke:#01579b,stroke-width:2px + classDef main fill:#fff3e0,stroke:#e65100,stroke-width:3px + classDef phase fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px + classDef action fill:#e3f2fd,stroke:#1565c0,stroke-width:1px + classDef config fill:#fff9c4,stroke:#f57f17,stroke-width:2px + classDef issue fill:#fce4ec,stroke:#c2185b,stroke-width:2px + classDef note fill:#f5f5f5,stroke:#666,stroke-width:1px,stroke-dasharray:5 + classDef title fill:#ffffff,stroke:none,font-weight:bold,font-size:14px + + class T1,T2,T3,T4,T5 trigger + class W1,W2,W3,W4,W5,W6 main + class PhaseList phase + class A1,A2,A3 action + class C1,C2 config + class I1,I2,I3 issue + class Report,Note1,WNote note + class TTitle,WTitle,PTitle,ATitle,CTitle,ITitle title +``` + +### Architecture Layers Explained + +**Layer 1: Triggers** → Events that start workflows (PR, push, schedule, manual) + +**Layer 2: Main Workflows** → Top-level orchestrators (6 workflows) +- Each workflow composes different phase combinations +- Follow standard pattern: Initialize → Build → Test → Deploy → Finalize + +**Layer 3: Reusable Phases** → Modular workflow components (10 phases) +- Single source of truth for each phase +- Used by multiple main workflows +- See [dependency matrix](#workflow-dependency-matrix) for usage + +**Layer 4: Composite Actions** → Atomic operations (15+ actions) +- Low-level building blocks +- Called by phases +- Categories: Core CICD, Deployment, Support + +**Layer 5: Configuration** → Data-driven behavior +- test-matrix.yml: Test suite definitions +- filters.yaml: Change detection rules +- JSON files: Team and Slack mappings + +### Detailed Component Reference + +For complete details on individual components: +- [Reusable Workflow Phases](#reusable-workflow-phases) - All 10 phase workflows with purposes +- [Custom Actions](#custom-actions) - All 15+ composite actions organized by category +- [Workflow Dependency Matrix](#workflow-dependency-matrix) - Which workflows use which phases +- [Detailed Flow Diagrams](#detailed-flow-diagrams) - Step-by-step flows for each workflow + +## Workflow Configurations (Path to Main Branch) + +Each top-level CICD workflow has specific triggers, purposes, and configuration optimizations: + +| Workflow | Trigger | Purpose & Configuration | +|----------|---------|------------------------| +| **cicd_1-pr.yml** | Push of PR to GitHub | • **No secrets** - runs on unreviewed code
• **Selective testing** - uses filters to skip unaffected tests
• **Post-workflow reporting** - separate workflow handles secrets
• **Filter-based** - `.github/filters.yaml` determines what runs | +| **cicd_2-merge-queue.yml** | PR added to merge queue | • **All tests** - catches flaky tests and filter issues
• **Includes all PRs ahead** - tests combined code
• **Same commit as main** - success means HEAD of main will be identical
• **Critical monitoring** - failures here block all developers | +| **cicd_3-trunk.yml** | Push to main branch | • **Artifact reuse** - uses merge queue build artifacts
• **Native CLI builds** - too expensive for every PR
• **Snapshot deployments** - GitHub packages, Artifactory
• **SDK publishing** - optional NPM package publishing | +| **cicd_4-nightly.yml** | Scheduled (3:18 AM daily) | • **Trunk health monitor** - fail-safe detection of breakage
• **Prevents change accumulation** - catch issues before they pile up
• **Critical for CI success** - enabled increased release frequency
• **NOT a release gate** - legacy workflow for codebase quality
• **Long-running tests** - tests too slow for PR workflow | +| **cicd_5-lts.yml** | Push to release-* branches | • **LTS validation** - tests for long-term support branches
• **Full test suite** - comprehensive validation
• **Version-specific** - uses hardcoded version in workflow | +| **cicd_6-release.yml** | Manual workflow_dispatch | • **Production release** - creates official releases
• **Full deployment** - Artifactory, Docker, Javadocs
• **Version management** - creates release branches and tags
• **GitHub labels** - updates issue tracking labels | + +### Development Philosophy + +**Main Branch Releasability**: The main branch should always be in a releasable state. PR validation gates are the primary defense against unreleasable commits. This prevents development disruption when developers merge main into their branches. + +**Trunk Health is Critical**: Maintaining a clean main branch has been crucial to CI success and increased release frequency. The nightly workflow serves as an early warning system: +- **Problem**: Issues accumulating on main make debugging exponentially harder +- **Solution**: Nightly tests catch trunk breakage within 24 hours +- **Result**: Fast identification and fixes prevent cascading issues +- **Impact**: Enabled dramatic increase in release frequency over recent years + +**Progressive Promotion**: Before official release, commits undergo additional validation phases without blocking development: + +```text +Developer PR → PR Check → Merge Queue → Main Branch → Manual QA/Smoke Testing → RC → Release + ↓ + Nightly Tests + (Trunk Health Monitor) +``` + +**Trunk Health vs Release Promotion:** + +- **Nightly Tests**: Legacy workflow, NOT part of release promotion + - **Purpose**: Early detection of trunk breakage + - **Critical for**: Maintaining clean codebase, CI success, release frequency + - **Why it matters**: Catches issues before changes accumulate (easier debugging) + - **Result**: Has enabled increased release frequency over recent years + +- **Manual QA/Smoke Testing**: Always required before RC + - **Purpose**: Validation for release readiness + - **Gate**: Must pass to promote to Release Candidate + +## Release Promotion Process + +### Version Promotion Flow + +Commits are promoted from main to release through intermediate branches. Each promotion step increases confidence: + +**Before Nightly Promotion:** +```text +nightly: PR1A + | +main: --PR1---PR2---PR3---PR4 +``` + +**After Nightly Promotion:** +```text +nightly: PR1A--PR2B--PR3B--PR4B (versioned with date) + | | | | +main: --PR1---PR2---PR3---PR4 +``` + +**Key Points:** +- Promotion creates new commits (different SHA) with version changes +- Only `.mvn/maven.config` is modified (deterministic, reproducible) +- Original SHA is preserved for traceability +- All commits between current HEAD and target are included (no cherry-picking) + +### Example: PR to Release Flow + +```text +x = promotion with version change + +release PR1a--PR2b--PR3b--PR4b--PR5b--PR6b + |x | | | | |x +rc PR1A--PR2B--PR3B--PR4B--PR5C--PR6C--PR7D + |x | | |x | |x |x +main run: --PR1---PR2---PR3---PR4---PR5---PR6---PR7---PR8 +``` + +**Timeline:** +1. PR1 promoted to RC-A → RC testing occurs +2. PR1A tested and approved → Release A (while PR2, PR3 added to main) +3. PR4 promoted to RC-B → PR4B tested (includes PR2B, PR3B, PR4B) +4. PR4B not approved (issue found) +5. PR6 (fix) promoted to RC-C +6. PR6C approved → Release C + +**Notes:** +- RC versions can use `-rc` suffix or final version number +- If using final version in RC, deploy to staging (no rebuild on release) +- Versions can change on promotion or retain same version with internal build number +- No manual changes to promotion branches (only version config) +- Fixes flow through normal PR process + +## Detailed Flow Diagrams + +### 1. PR Check Workflow (cicd_1-pr.yml) + +```mermaid +flowchart TD + Start([PR Opened/Synchronized]) --> Init[Initialize Phase] + Init --> CheckArtifacts{Found
Previous
Artifacts?} + + CheckArtifacts -->|No| CheckChanges{Need
Build?} + CheckArtifacts -->|Yes| Test + + CheckChanges -->|Yes| Build[Build Phase] + CheckChanges -->|No| Test + + Build --> Test[Test Phase
Matrix Strategy] + Build --> Semgrep[Semgrep Phase] + + Test --> Finalize[Finalize Phase] + Semgrep --> Finalize + + Finalize --> PRNotif[PR Notifier] + Finalize -.Workflow Complete.-> Report[Post-Workflow
Reporting] + + Report --> Slack[Slack Notification
on Failure] + + subgraph "Test Matrix" + Test --> JVM[JVM Unit Tests] + Test --> Frontend[Frontend Tests] + Test --> Integration[Integration Tests
6 Suites] + Test --> Postman[Postman Tests
11 Collections] + Test --> Karate[Karate Tests] + Test --> CLI[CLI Tests] + end + + style Start fill:#4caf50 + style Report fill:#ff9800 + style Slack fill:#2196f3 +``` + +### 2. Release Workflow (cicd_6-release.yml) + +```mermaid +flowchart TD + Start([Manual Trigger
Release Version]) --> Init[Initialize Phase] + Init --> RelPrep[Release Prepare Phase] + + RelPrep --> Validate{Validate
Version
Format} + Validate -->|Invalid| Error([❌ Exit]) + Validate -->|Valid| CreateBranch[Create Release Branch
Set Version in maven.config] + + CreateBranch --> UpdateLicense[Update LICENSE Date] + UpdateLicense --> CreateTag[Create Git Tag] + CreateTag --> CreateGHRelease[Create GitHub Release] + + CreateGHRelease --> Build[Build Phase
Production Build] + + Build --> Deploy[Deployment Phase] + Deploy --> Docker[Build/Push Docker Images
dotcms & dotcms-dev] + + Deploy --> Release[Release Phase] + Release --> Parallel{Parallel
Operations} + + Parallel --> Artifactory[Deploy to Artifactory] + Parallel --> Javadoc[Upload Javadocs to S3] + Parallel --> Plugins[Update Plugin Repos] + Parallel --> SBOM[Generate SBOM] + + Artifactory --> Labels{Update
GitHub
Labels?} + Labels -->|Yes| UpdateLabels[Release Labeling
Next Release → Release vX.X.X] + Labels -->|No| Finalize + UpdateLabels --> Finalize[Finalize Phase] + + Finalize --> SlackNotify[Slack Notification
Release Announcement] + + style Start fill:#4caf50 + style Error fill:#f44336 + style SlackNotify fill:#2196f3 + style Docker fill:#0db7ed +``` + +### 3. Trunk Workflow (cicd_3-trunk.yml) + +```mermaid +flowchart TD + Start([Push to main]) --> Init[Initialize Phase] + Init --> CheckReuse{Reuse
Previous
Build?} + + CheckReuse -->|No| Build[Build Phase] + CheckReuse -->|Yes| CheckFound{Found
Artifacts?} + + CheckFound -->|No & build-on-missing| Build + CheckFound -->|Yes| Test + CheckFound -->|No & !build-on-missing| Error([❌ Exit]) + + Build --> Test[Test Phase] + Test --> TestMatrix{Run All
Tests?} + + TestMatrix -->|Yes| AllTests[All Test Suites] + TestMatrix -->|No| ChangedTests[Changed Component Tests] + + AllTests --> Semgrep[Semgrep Phase] + ChangedTests --> Semgrep + + Test --> BuildCLI[CLI Native Build Phase] + Semgrep --> BuildCLI + + BuildCLI --> OSMatrix{Native
Build
Matrix} + OSMatrix --> Linux[Linux x86_64] + OSMatrix --> MacIntel[macOS Intel] + OSMatrix --> MacSilicon[macOS Silicon] + + Linux --> Deploy[Deployment Phase] + MacIntel --> Deploy + MacSilicon --> Deploy + + Deploy --> DeployDocker[Docker Images] + Deploy --> DeployCLI[CLI to JFrog] + Deploy --> DeploySDK{Publish
NPM SDKs?} + + DeploySDK -->|Yes| SDKPublish[NPM SDK Libs] + DeploySDK -->|No| Finalize + SDKPublish --> Finalize[Finalize Phase] + + Finalize --> Report[Post-Workflow
Reporting] + + style Start fill:#4caf50 + style Error fill:#f44336 +``` + +### 4. Test Phase Matrix Strategy + +```mermaid +flowchart LR + Start[Test Phase] --> Matrix[setup-matrix Job
Parse test-matrix.yml] + + Matrix --> Generate{Generate
Test
Combinations} + + Generate --> Check{Test
Enabled?} + + Check -->|JVM Unit| JVM[JVM Unit Tests
1 suite] + Check -->|Frontend| FE[Frontend Tests
1 suite] + Check -->|CLI| CLI[CLI Tests
1 suite] + Check -->|Integration| IT[Integration Tests
6 suites] + Check -->|Postman| PM[Postman Tests
11 collections] + Check -->|Karate| KT[Karate Tests
1 suite] + Check -->|E2E| E2E[E2E Tests
2 suites] + + subgraph "Integration Suites" + IT --> IT1[MainSuite 1a] + IT --> IT2[MainSuite 1b] + IT --> IT3[MainSuite 2a] + IT --> IT4[MainSuite 2b] + IT --> IT5[MainSuite 3a] + IT --> IT6[Junit5Suite 1] + end + + subgraph "Postman Collections" + PM --> PM1[ai] + PM --> PM2[category-content] + PM --> PM3[container] + PM --> PM4[page] + PM --> PM5[template] + PM --> PM6[experiment] + PM --> PM7[graphql] + PM --> PM8[workflow] + PM --> PM9[pp] + PM --> PM10[default-split] + PM --> PM11[default] + end + + JVM --> Execute[test-matrix Job
Parallel Execution] + FE --> Execute + CLI --> Execute + IT1 --> Execute + IT2 --> Execute + IT3 --> Execute + IT4 --> Execute + IT5 --> Execute + IT6 --> Execute + PM1 --> Execute + PM2 --> Execute + PM3 --> Execute + PM4 --> Execute + PM5 --> Execute + PM6 --> Execute + PM7 --> Execute + PM8 --> Execute + PM9 --> Execute + PM10 --> Execute + PM11 --> Execute + E2E --> Execute + + Execute --> Results[Aggregate Results] + + style Matrix fill:#4caf50 + style Execute fill:#2196f3 +``` + +### 5. Issue Management Flow + +```mermaid +flowchart TD + subgraph "PR Opened" + PROpen([PR Opened Event]) --> LinkIssue[issue_comp_link-issue-to-pr.yml] + + LinkIssue --> CheckExist{Issue
Already
Linked?} + + CheckExist -->|Yes| UpdateComment[Update PR List Comment] + CheckExist -->|No| Extract{Extract
Issue #} + + Extract -->|From Branch| BranchPattern[issue-123-feature
123-feature] + Extract -->|From PR Body| BodyPattern[fixes #123
closes #123] + Extract -->|From Dev Section| DevSection[GitHub Development
Section] + + BranchPattern --> CreateComment[Create PR List Comment] + BodyPattern --> CreateComment + DevSection --> CreateComment + + Extract -->|Not Found| FailComment[Add Failure Comment
with Instructions] + + CreateComment --> AddToPR[Add 'fixes #123'
to PR Body] + UpdateComment --> AddToPR + + FailComment --> Block([❌ Block PR]) + end + + subgraph "PR Merged" + PRMerge([PR Merged Event]) --> LinkPR[issue_comp_link-pr-to-issue.yml] + + LinkPR --> ValidateMerge{Validate
Merge
to Main?} + + ValidateMerge -->|Yes| UpdateIssue[Update Issue Comment
Add ✅ to PR] + ValidateMerge -->|No| Skip([Skip]) + + UpdateIssue --> CheckClose{Close
Issue?} + + CheckClose -->|Auto Close Keyword| CloseIssue[Close Issue] + CheckClose -->|No Keyword| KeepOpen[Keep Issue Open] + end + + subgraph "Release Process" + Release([Release Created]) --> RelLabel[issue_comp_release-labeling.yml] + + RelLabel --> FindIssues[Find Issues with
'Next Release' Label] + + FindIssues --> RenameLabel[Rename Labels
Next Release → Release vX.X.X] + end + + style PROpen fill:#4caf50 + style PRMerge fill:#2196f3 + style Release fill:#ff9800 + style Block fill:#f44336 +``` + +## Workflow Dependency Matrix + +| Workflow | Initialize | Build | Test | Semgrep | CLI Build | Deploy | Release | Finalize | Report | +|----------|------------|-------|------|---------|-----------|--------|---------|----------|--------| +| **1-PR** | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | +| **2-Merge Queue** | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | +| **3-Trunk** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | +| **4-Nightly** | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ | +| **5-LTS** | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| **6-Release** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | + +## Common Patterns & Best Practices + +### 1. Artifact Reuse Pattern +```yaml +# Initialize checks for previous successful build artifacts +# This allows skipping rebuild if commit already built +reuse-previous-build: true +build-on-missing-artifacts: false # or true to build if not found +``` + +### 2. Conditional Execution Pattern +```yaml +# All jobs use conditional execution based on previous job outputs +needs: [initialize] +if: needs.initialize.outputs.found_artifacts == 'false' +``` + +### 3. Test Skipping Pattern +```yaml +# Tests are conditionally run based on file changes detected in initialize +with: + jvm_unit_test: ${{ needs.initialize.outputs.jvm_unit_test == 'true' }} + integration: ${{ needs.initialize.outputs.backend == 'true' }} + frontend: ${{ needs.initialize.outputs.frontend == 'true' }} +``` + +### 4. Security Pattern +```yaml +# PR workflows cannot access secrets +# Post-workflow-reporting runs separately with secrets +on: + workflow_run: + workflows: ['PR Check'] + types: [completed] +``` + +### 5. Matrix Generation Pattern +```yaml +# Dynamic test matrix from external YAML config +- uses: mikefarah/yq@v4.47.1 + with: + cmd: yq -o=json .github/test-matrix.yml +``` + +## Key Actions + +### Core CICD Actions + +| Action | Purpose | Used By | +|--------|---------|---------| +| **maven-job** | Standardized Maven execution | All build/test phases | +| **setup-java** | Java & GraalVM setup | Build, CLI phases | +| **prepare-runner** | Pre-build cleanup & setup | Initialize phase | +| **cleanup-runner** | Post-build cleanup | All phases | +| **api-limits-check** | Check GitHub API rate limits | Initialize, Finalize | + +### Deployment Actions + +| Action | Purpose | Used By | +|--------|---------|---------| +| **deploy-docker** | Multi-platform Docker builds | Deployment phase | +| **deploy-jfrog** | Artifactory deployment | Deployment, Release | +| **deploy-cli-npm** | CLI NPM publishing | Deployment phase | +| **deploy-javadoc** | Javadoc S3 upload | Release phase | +| **deploy-javascript-sdk** | SDK NPM publishing | Deployment phase | + +### Notification Actions + +| Action | Purpose | Used By | +|--------|---------|---------| +| **notify-slack** | Slack notifications | Deployment, Reporting | + +### Issue Management Actions + +| Action | Purpose | Used By | +|--------|---------|---------| +| **issue-fetcher** | Fetch issue details | Issue linking workflows | +| **issue-labeler** | Label management | Issue workflows | + +## Configuration Files + +### test-matrix.yml +Centralized test configuration defining all test suites: +- Test types (jvm, integration, postman, karate, cli, frontend, e2e) +- Default settings (timeouts, Maven options) +- Suite-specific configurations +- Conditional execution logic + +### filters.yaml +Path-based change detection for conditional test execution: +- Backend changes (Java, Maven files) +- Frontend changes (TypeScript, Angular files) +- CLI changes +- SDK library changes + +### data/github-teams.json +Team to member mappings for notifications and assignments + +### data/slack-mappings.json +Slack channel and user mappings for notifications + +## Troubleshooting Guide + +### Common Issues + +#### 1. Tests Silently Skipped +**Symptoms**: Tests don't run but workflow succeeds +**Cause**: Test skip flags not set to `false` +**Solution**: Ensure `-Dcoreit.test.skip=false`, `-Dpostman.test.skip=false`, etc. + +#### 2. Artifact Not Found +**Symptoms**: "No artifacts found" error +**Cause**: Previous build didn't complete or artifacts expired +**Solution**: +- Set `build-on-missing-artifacts: true` +- Or trigger new build + +#### 3. Workflow Stuck in Queue +**Symptoms**: Workflow shows "queued" for long time +**Cause**: Concurrency limit or runner availability +**Solution**: Check concurrency groups, may need to cancel previous runs + +#### 4. Release Branch Already Exists +**Symptoms**: Release workflow fails at branch creation +**Cause**: Previous release process didn't clean up +**Solution**: Workflow auto-removes existing release branches + +#### 5. Semgrep Timeout +**Symptoms**: Semgrep job times out +**Cause**: Large codebase or API issues +**Solution**: Set `SEMGREP_NO_FAIL=true` to continue on errors + +### Debugging Steps + +1. **Check Initialize Phase Outputs** + - View workflow run logs + - Check `found_artifacts`, `backend`, `frontend` outputs + - Verify path filters matched correctly + +2. **Verify Test Matrix Generation** + - Check setup-matrix job logs + - Ensure test-matrix.yml is valid YAML + - Verify condition inputs are correct + +3. **Check Artifact Availability** + - Look for "maven-repo" artifact in previous runs + - Verify artifact retention (2-90 days) + - Check artifact size limits + +4. **Review API Rate Limits** + - Check api-limits-check output + - GitHub has rate limits for API calls + - May need to wait if exhausted + +5. **Inspect Slack Notifications** + - Verify SLACK_BOT_TOKEN is set + - Check channel IDs in data/slack-mappings.json + - Review slack-notification steps in workflows + +## How to Trigger a Release + +The release process is initiated manually through GitHub Actions UI: + +### Location +Navigate to: https://github.com/dotCMS/core/actions → **"-6 Release Process"** workflow + +### Required Inputs + +| Field | Format | Example | Description | +|-------|--------|---------|-------------| +| **Release Version** | `yy.mm.dd-##` or `yy.mm.dd_lts_v##` | `24.12.31-01` | Date-based version with counter
LTS format: `24.12.31_lts_v01` | +| **Commit Hash** | Full SHA (optional) | `a1b2c3d4...` | Defaults to latest commit on main
Use specific commit if needed | + +### Optional Configurations + +| Option | Default | Purpose | +|--------|---------|---------| +| **Deploy Artifact** | ✅ Enabled | Deploy to Artifactory (repo.dotcms.com)
**Required for successful release** | +| **Update Plugins** | ✅ Enabled | Triggers plugin-seeds repo update
**Note**: Currently requires manual execution at [plugin-seeds/release-target.yml](https://github.com/dotCMS/plugin-seeds/actions/workflows/release-target.yml) | +| **Upload Javadocs** | ✅ Enabled | Generate and upload to S3 static bucket
Creates HTML documentation bundle | +| **Update GitHub Labels** | ✅ Enabled | Replaces "Next Release" with "Release vX.X.X"
Enables filtering issues by release | +| **Notify Slack** | ✅ Enabled | Posts announcement to general channel
Can disable for testing | + +### Process Overview + +1. **Validate Version Format** - Ensures correct naming convention +2. **Create Release Branch** - `release-{version}` from specified commit +3. **Update Configuration** - Sets version in `.mvn/maven.config` +4. **Update LICENSE** - Updates Change Date field +5. **Create GitHub Release** - Creates draft release with tag +6. **Build Artifacts** - Production build with version +7. **Deploy Images** - Docker images (dotcms, dotcms-dev) to Docker Hub +8. **Deploy Artifacts** - Maven artifacts to Artifactory +9. **Upload Javadocs** - Documentation to S3 +10. **Update Plugins** - Triggers plugin repository workflow +11. **Generate SBOM** - Software Bill of Materials +12. **Update Labels** - GitHub issue label management +13. **Notify Team** - Slack announcement with Docker tags + +### Tracking Progress + +After clicking "Run workflow", you'll be redirected to the workflow run page showing: +- Real-time progress of each stage +- Detailed logs for debugging +- Artifact downloads +- Final status and notifications + +### Post-Release + +- **Docker Images**: Available immediately at `dotcms/dotcms:{version}` +- **Maven Artifacts**: Available at repo.dotcms.com +- **Javadocs**: Published to `static.dotcms.com/docs/{version}/javadocs` +- **GitHub Release**: Created with SBOM attached +- **Issue Labels**: All "Next Release" issues tagged with actual version + +## Maintenance + +### Adding New Tests +1. Update `test-matrix.yml` with new suite configuration +2. No workflow changes needed (matrix auto-generates) +3. Test locally with workflow_dispatch + +### Modifying Build Process +1. Update appropriate phase workflow (build, test, etc.) +2. Test in PR workflow first +3. Propagate to other workflows once validated + +### Adding New Actions +1. Create action in `.github/actions/` directory +2. Add `action.yml` with inputs/outputs +3. Update this documentation with action details + +### Version Updates +- Ubuntu runner: Update `vars.UBUNTU_RUNNER_VERSION` +- macOS runner: Update `vars.MACOS_*_RUNNER_VERSION` +- Java version: Update `.sdkmanrc` file + +### Best Practices + +**When Creating New Workflows:** +- Don't create new workflows for existing triggers - extend existing ones +- Leverage reusable phase workflows instead of duplicating logic +- Consider if your functionality fits into the standard phase pattern +- Use the CICD component structure for consistency + +**Security Considerations:** +- PR workflows should **never** use secrets (runs on unreviewed code) +- Secrets are unavailable for fork PRs +- Use post-workflow-reporting for notifications requiring secrets +- Isolate sensitive operations to post-merge workflows + +## Caching and Artifacts Strategy + +Our CI/CD process extensively uses caching and artifacts to optimize performance and enable parallel execution: + +### Caching Strategy +- **Maven Repository**: Cached between workflow runs to speed up dependency downloads +- **Node.js Dependencies**: Yarn/npm caches for frontend builds +- **Build Outputs**: Compiled artifacts cached for reuse in subsequent jobs +- **GraalVM Native Images**: CLI native build caches + +### Artifact Sharing +- **maven-repo**: Complete Maven repository shared between jobs + - Enables artifact reuse from merge queue in trunk workflow + - Allows skipping rebuild if commit already built and tested + - Retention: 2-90 days depending on artifact type + +- **build-reports**: Test results and build reports + - Aggregated in finalize phase + - Used by post-workflow-reporting for notifications + - Includes JUnit XML for test reporting + +- **docker-context**: Docker build context and images + - Multi-platform builds (linux/amd64, linux/arm64) + - Shared between deployment jobs + +- **cli-artifacts**: Native CLI distributions + - Per-platform artifacts (Linux, macOS Intel, macOS Silicon) + - Used for NPM publishing and JFrog deployment + - 2-day retention for snapshot builds + +### Parallel Execution Model + +The artifact strategy enables efficient parallelization: + +```text +Initialize (determines what changed) + ↓ +Build (creates maven-repo artifact) + ↓ +┌─────────────┬────────────────┬──────────────┬─────────┐ +│ Integration │ Postman Tests │ Frontend │ Semgrep │ +│ Tests (6x) │ (11x) │ Tests │ Analysis│ +│ Parallel │ Parallel │ │ │ +└─────────────┴────────────────┴──────────────┴─────────┘ + ↓ +Finalize (aggregates all results) + ↓ +Report (comprehensive notification) +``` + +**Key Benefits:** +- 23+ test suites run in parallel (not sequential) +- Total pipeline time ≈ longest test suite (not sum of all tests) +- Artifact reuse eliminates redundant builds +- Each job gets identical build artifacts ensuring consistency + +## Performance Optimization + +### Build Time Optimization +- **Artifact Reuse**: Enables ~5-10 min savings per workflow + - Trunk workflow reuses merge queue artifacts + - Saves ~3-5 minutes of build time + - Ensures tested code matches deployed code + +- **Conditional Tests**: Skips unnecessary tests based on changes + - Path filters detect which components changed + - Only runs affected test suites + - Can save 30-60 minutes on frontend-only PRs + +- **Matrix Parallelization**: Runs test suites concurrently + - 6 integration test suites in parallel + - 11 Postman collection tests in parallel + - Reduces total test time from ~180 min to ~30 min + +- **Incremental Builds**: Maven incremental compilation + - Only rebuilds changed modules + - Leverages cached dependencies + +### Cost Optimization +- **PR Checks**: No macOS runners (saves $$) + - macOS runners cost 10x more than Linux + - Native builds deferred to post-merge + - Estimated savings: ~$500/month + +- **Trunk/Nightly**: Full native builds only after merge + - 3-platform native builds (Linux, macOS Intel, macOS ARM) + - Only runs on validated code + - ~45 minutes of macOS runner time per build + +- **Artifact Retention**: 2-day retention for CLI, 90-day for releases + - Snapshots purged quickly to save storage + - Release artifacts preserved for compliance + - Configurable per artifact type + +- **Concurrency Management**: Cancel in-progress runs + - New PR pushes cancel previous runs + - Prevents wasted compute on superseded code + - Reduces queue times + +### Runner Selection +- **ubuntu-24.04**: Default for most jobs (cheapest) + - Most cost-effective at $0.008/minute + - Sufficient for Java, Docker, test execution + +- **macos-14**: Apple Silicon (ARM64) + - Used for macOS ARM native CLI builds + - $0.16/minute (20x more than Linux) + - Only when absolutely necessary + +- **macos-15-intel**: Intel x86_64 + - Used for macOS Intel native CLI builds + - $0.08/minute (10x more than Linux) + - Legacy architecture support + +- **Configuration**: Via repository variables for easy updates + - `UBUNTU_RUNNER_VERSION`: Currently `24.04` + - `MACOS_SILICON_RUNNER_VERSION`: Currently `14` + - `MACOS_INTEL_RUNNER_VERSION`: Currently `15-intel` + +## Security Considerations + +### Secret Isolation +- PR workflows use `pull_request` trigger (no secrets access) +- Post-workflow-reporting runs separately with secrets +- Release workflows use environment-specific secrets + +### Token Usage +- `GITHUB_TOKEN`: Auto-provided, limited permissions +- `CI_MACHINE_TOKEN`: Extended permissions for releases +- `SLACK_BOT_TOKEN`: Notification only +- `SEMGREP_APP_TOKEN`: Security scanning + +### Artifact Security +- Artifacts are repository-scoped +- 2-90 day retention based on type +- Docker images pushed to authenticated registry + +## Why This Architecture? + +Our CI/CD architecture delivers significant benefits over traditional monolithic pipeline approaches: + +### 1. Modularity & Maintainability +**Problem Solved**: Duplicated workflow logic across different triggers +**Our Solution**: Reusable phase workflows (Initialize, Build, Test, etc.) + +- Single source of truth for each phase +- Changes propagate automatically to all workflows +- Easy to add new workflows by composing existing phases +- Reduced maintenance burden (~70% less code duplication) + +### 2. Consistency & Reliability +**Problem Solved**: Different workflows behaving differently +**Our Solution**: Standardized phase pattern and shared actions + +- Same build process for PR, merge queue, trunk, and nightly +- Identical test execution regardless of trigger +- Consistent error handling and reporting +- Predictable behavior reduces debugging time + +### 3. Performance & Efficiency +**Problem Solved**: Long CI/CD pipeline times blocking development +**Our Solution**: Parallel execution, artifact reuse, conditional testing + +- Test suites run in parallel (30 min vs 180 min sequential) +- Artifact reuse saves 5-10 min per workflow +- Conditional tests skip unnecessary work +- Developers get faster feedback + +### 4. Cost Optimization +**Problem Solved**: Expensive cloud runner costs +**Our Solution**: Strategic runner selection and job optimization + +- Linux runners for most work ($0.008/min) +- macOS runners only when necessary ($0.08-$0.16/min) +- Native builds deferred to post-merge +- Estimated savings: $500+/month + +### 5. Flexibility & Scalability +**Problem Solved**: Difficulty adding new tests or deployment targets +**Our Solution**: Configuration-driven test matrix and modular deployment + +- New test suites added via YAML config (no workflow changes) +- Matrix strategy automatically parallelizes new tests +- New deployment targets as reusable actions +- Scales from 10 to 100+ test suites + +### 6. Security & Isolation +**Problem Solved**: Secret exposure in PR workflows +**Our Solution**: Separate PR workflow from post-workflow reporting + +- PR workflows run on untrusted code (no secrets) +- Post-workflow-reporting runs separately with secrets +- Environment-based secret scoping for releases +- Principle of least privilege + +### 7. Developer Experience +**Problem Solved**: Confusing workflow failures and difficult debugging +**Our Solution**: Clear phase separation, detailed reporting, troubleshooting docs + +- Named phases show exactly where failures occur +- Comprehensive test reports with links +- Slack notifications with failure details +- Documentation for common issues + +### 8. Trunk Health Monitoring +**Problem Solved**: Accumulated changes making trunk breakage hard to debug +**Our Solution**: Nightly validation as early warning system + +- Catches trunk breakage within 24 hours +- Prevents issue accumulation (easier debugging) +- Maintains releasable main branch at all times +- Enabled dramatic increase in release frequency +- **Note**: Legacy workflow, not part of release promotion flow + +### Real-World Impact + +| Metric | Before | After | Improvement | +|--------|--------|-------|-------------| +| **PR Check Time** | ~45 min | ~15-25 min | 40-50% faster | +| **Workflow Duplication** | ~2000 lines | ~600 lines | 70% reduction | +| **macOS Runner Costs** | ~$800/mo | ~$300/mo | 62% savings | +| **Test Coverage** | 15 suites | 23 suites | 53% more tests | +| **Deployment Time** | Manual, 2+ hours | Automated, 20 min | 83% faster | +| **Failed PR Debug Time** | ~30 min avg | ~10 min avg | 67% faster | +| **Trunk Breakage Detection** | Days/weeks | <24 hours | Early warning system | +| **Release Frequency** | Lower cadence | Significantly increased | Clean trunk enabler | + +## Related Documentation + +### Internal Documentation +- **[README.md](.github/workflows/README.md)** - Overview and getting started guide +- **[maven-release-process.md](.github/workflows/maven-release-process.md)** - Detailed release walkthrough with UI screenshots +- **[test-matrix.yml](.github/test-matrix.yml)** - Test suite configuration reference +- **[filters.yaml](.github/filters.yaml)** - Path-based change detection rules + +### GitHub Actions Documentation +- [GitHub Actions Documentation](https://docs.github.com/en/actions) +- [Workflow Syntax Reference](https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions) +- [Reusable Workflows Guide](https://docs.github.com/en/actions/using-workflows/reusing-workflows) +- [Matrix Strategy Guide](https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs) +- [Composite Actions Guide](https://docs.github.com/en/actions/creating-actions/creating-a-composite-action) + +### External Tools +- [Semgrep Documentation](https://semgrep.dev/docs/) +- [Docker Buildx Documentation](https://docs.docker.com/buildx/working-with-buildx/) +- [Maven Documentation](https://maven.apache.org/guides/) + +--- + +**Last Updated**: December 2024 +**Maintained By**: dotCMS DevOps Team +**Questions?** Contact #guild-dev-pipeline on Slack + diff --git a/.github/workflows/cicd_1-pr.yml b/.github/workflows/cicd_1-pr.yml index cadf48c8f3af..ef08b68ce454 100644 --- a/.github/workflows/cicd_1-pr.yml +++ b/.github/workflows/cicd_1-pr.yml @@ -70,7 +70,7 @@ jobs: karate: ${{ needs.initialize.outputs.backend == 'true' }} frontend: ${{ needs.initialize.outputs.frontend == 'true' }} cli: ${{ needs.initialize.outputs.cli == 'true' }} - e2e: ${{ needs.initialize.outputs.build == 'true' }} + e2e: false secrets: DOTCMS_LICENSE: ${{ secrets.DOTCMS_LICENSE }} diff --git a/.github/workflows/cicd_2-merge-queue.yml b/.github/workflows/cicd_2-merge-queue.yml index 1542418ccfe8..6867bc21cf70 100644 --- a/.github/workflows/cicd_2-merge-queue.yml +++ b/.github/workflows/cicd_2-merge-queue.yml @@ -27,7 +27,7 @@ jobs: karate: ${{ needs.initialize.outputs.backend == 'true' }} frontend: ${{ needs.initialize.outputs.frontend == 'true' }} cli: ${{ needs.initialize.outputs.cli == 'true' }} - e2e: ${{ needs.initialize.outputs.build == 'true' }} + e2e: false secrets: DOTCMS_LICENSE: ${{ secrets.DOTCMS_LICENSE }} finalize: diff --git a/.github/workflows/cicd_3-trunk.yml b/.github/workflows/cicd_3-trunk.yml index 028fa7416211..60e742e255fb 100644 --- a/.github/workflows/cicd_3-trunk.yml +++ b/.github/workflows/cicd_3-trunk.yml @@ -70,6 +70,7 @@ jobs: with: run-all-tests: ${{ inputs.run-all-tests || false }} artifact-run-id: ${{ needs.initialize.outputs.artifact-run-id }} + e2e: false secrets: DOTCMS_LICENSE: ${{ secrets.DOTCMS_LICENSE }} permissions: diff --git a/.github/workflows/cicd_4-nightly.yml b/.github/workflows/cicd_4-nightly.yml index abddfd02d462..9f921dccef4c 100644 --- a/.github/workflows/cicd_4-nightly.yml +++ b/.github/workflows/cicd_4-nightly.yml @@ -63,6 +63,7 @@ jobs: with: run-all-tests: ${{ inputs.run-all-tests || true }} artifact-run-id: ${{ needs.initialize.outputs.artifact-run-id }} + e2e: false secrets: DOTCMS_LICENSE: ${{ secrets.DOTCMS_LICENSE }} permissions: diff --git a/.github/workflows/cicd_5-lts.yml b/.github/workflows/cicd_5-lts.yml index e9bcb045573d..0b798ddd4629 100644 --- a/.github/workflows/cicd_5-lts.yml +++ b/.github/workflows/cicd_5-lts.yml @@ -57,7 +57,7 @@ jobs: karate: ${{ needs.initialize.outputs.backend == 'true' }} frontend: ${{ needs.initialize.outputs.frontend == 'true' }} cli: ${{ needs.initialize.outputs.cli == 'true' }} - e2e: ${{ needs.initialize.outputs.build == 'true' }} + e2e: false secrets: DOTCMS_LICENSE: ${{ secrets.DOTCMS_LICENSE }} diff --git a/.github/workflows/cicd_6-release.yml b/.github/workflows/cicd_6-release.yml new file mode 100644 index 000000000000..0a81dab522d4 --- /dev/null +++ b/.github/workflows/cicd_6-release.yml @@ -0,0 +1,178 @@ +# +# Release Workflow +# +# This workflow handles the complete release process for dotCMS following the established +# phase pattern: initialize -> build -> deployment -> finalize +# +# Key features: +# - Release preparation (branch creation, version setting) +# - Standard build phase for artifact generation +# - Release-specific deployment (Artifactory, Javadocs, plugins) +# - Docker image deployment via standard deployment phase +# - SBOM generation +# - GitHub label management +# - Release notifications +# +# This workflow follows the modular phase pattern established in the CICD architecture +# and replaces the legacy-release_maven-release-process.yml workflow +# + +name: '-6 Release Process' + +on: + workflow_dispatch: + inputs: + release_version: + description: 'Release Version (yy.mm.dd-## or yy.mm.dd_lts_v##] ##: counter)' + required: true + release_commit: + description: 'Commit Hash (default to latest commit)' + required: false + deploy_artifact: + description: 'Deploy Artifact to Artifactory' + type: boolean + default: true + required: false + update_plugins: + description: 'Update Plugins' + type: boolean + default: true + required: false + upload_javadocs: + description: 'Upload Javadocs to S3' + type: boolean + default: true + required: false + update_github_labels: + description: 'Update GitHub labels' + type: boolean + default: true + required: false + notify_slack: + description: 'Notify Slack' + type: boolean + default: true + required: false + +# No concurrency control - releases should complete without interruption +concurrency: + group: release-${{ github.event.inputs.release_version }} + cancel-in-progress: false + +jobs: + # Initialize - standard initialization phase (always first) + initialize: + name: Initialize + uses: ./.github/workflows/cicd_comp_initialize-phase.yml + with: + validation-level: 'none' + + # Release Prepare - validates version, creates release branch, sets version + release-prepare: + name: Release Prepare + needs: [ initialize ] + uses: ./.github/workflows/cicd_comp_release-prepare-phase.yml + with: + release_version: ${{ github.event.inputs.release_version }} + release_commit: ${{ github.event.inputs.release_commit }} + secrets: + CI_MACHINE_TOKEN: ${{ secrets.CI_MACHINE_TOKEN }} + CI_MACHINE_USER: ${{ secrets.CI_MACHINE_USER }} + + # Build - standard build phase for artifact generation + build: + name: Build + needs: [ release-prepare, initialize ] + if: always() && !failure() && !cancelled() + uses: ./.github/workflows/cicd_comp_build-phase.yml + with: + core-build: true + run-pr-checks: false + ref: ${{ needs.release-prepare.outputs.release_tag }} + validate: false + version: ${{ needs.release-prepare.outputs.release_version }} + generate-docker: true + permissions: + contents: read + packages: write + + # Deployment - standard deployment phase for Docker images and NPM + deployment: + name: Deployment + needs: [ release-prepare, initialize, build ] + if: always() && !failure() && !cancelled() + uses: ./.github/workflows/cicd_comp_deployment-phase.yml + with: + environment: ${{ needs.release-prepare.outputs.release_version }} + artifact-run-id: ${{ github.run_id }} + latest: ${{ needs.release-prepare.outputs.is_latest == 'true' }} + deploy-dev-image: true + reuse-previous-build: false + publish-npm-cli: false + publish-npm-sdk-libs: false + secrets: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} + EE_REPO_USERNAME: ${{ secrets.EE_REPO_USERNAME }} + EE_REPO_PASSWORD: ${{ secrets.EE_REPO_PASSWORD }} + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + DEV_REQUEST_TOKEN: ${{ secrets.DEV_REQUEST_TOKEN }} + + # Release - release-specific operations (Artifactory, Javadocs, Plugins, SBOM, Labels) + # Waits for deployment to complete to safely update labels only if both succeed + release: + name: Release + needs: [ release-prepare, initialize, build, deployment ] + if: always() && !failure() && !cancelled() + uses: ./.github/workflows/cicd_comp_release-phase.yml + with: + release_version: ${{ needs.release-prepare.outputs.release_version }} + release_tag: ${{ needs.release-prepare.outputs.release_tag }} + artifact_run_id: ${{ github.run_id }} + deploy_artifact: ${{ github.event.inputs.deploy_artifact }} + upload_javadocs: ${{ github.event.inputs.upload_javadocs }} + update_plugins: ${{ github.event.inputs.update_plugins }} + update_github_labels: ${{ github.event.inputs.update_github_labels }} + secrets: + EE_REPO_USERNAME: ${{ secrets.EE_REPO_USERNAME }} + EE_REPO_PASSWORD: ${{ secrets.EE_REPO_PASSWORD }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + CI_MACHINE_TOKEN: ${{ secrets.CI_MACHINE_TOKEN }} + + # Finalize - standard finalization phase (required for phase pattern) + finalize: + name: Finalize + if: always() + needs: [ initialize, build, deployment, release ] + uses: ./.github/workflows/cicd_comp_finalize-phase.yml + with: + artifact-run-id: ${{ github.run_id }} + needsData: ${{ toJson(needs) }} + + # Report - send release notification to Slack + report: + name: Report + runs-on: ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }} + needs: [ release-prepare, deployment, finalize ] + if: always() + steps: + - name: Checkout core + uses: actions/checkout@v4 + with: + ref: main + + - uses: ./.github/actions/core-cicd/cleanup-runner + + - name: Slack Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_WEBHOOK: ${{ secrets.RELEASE_SLACK_WEBHOOK }} + SLACK_USERNAME: dotBot + SLACK_TITLE: "Important news!" + SLACK_MSG_AUTHOR: " " + MSG_MINIMAL: true + SLACK_FOOTER: "" + SLACK_ICON: https://avatars.slack-edge.com/temp/2021-12-08/2830145934625_e4e464d502865ff576e4.png + SLACK_MESSAGE: " This automated script is excited to announce the release of a new version of dotCMS `${{ needs.release-prepare.outputs.release_version }}` :rocket:\n:docker: Produced images: [${{ needs.deployment.outputs.formatted_tags || needs.deployment.outputs.docker_tags }}]" + if: success() && github.event.inputs.notify_slack == 'true' \ No newline at end of file diff --git a/.github/workflows/cicd_comp_cli-native-build-phase.yml b/.github/workflows/cicd_comp_cli-native-build-phase.yml index 25237b6a876a..a8c5a2c36819 100644 --- a/.github/workflows/cicd_comp_cli-native-build-phase.yml +++ b/.github/workflows/cicd_comp_cli-native-build-phase.yml @@ -64,7 +64,7 @@ jobs: id: set-os run: | if [[ "${{ inputs.buildNativeImage }}" == "true" ]]; then - RUNNERS='[{ "os": "ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }}", "label": "Linux", "platform": "linux-x86_64" }, { "os": "macos-13", "label": "macOS-Intel", "platform": "osx-x86_64" }, { "os": "macos-14", "label": "macOS-Silicon", "platform": "osx-aarch_64" }]' + RUNNERS='[{ "os": "ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }}", "label": "Linux", "platform": "linux-x86_64" }, { "os": "macos-${{ vars.MACOS_INTEL_RUNNER_VERSION || '15-intel' }}", "label": "macOS-Intel", "platform": "osx-x86_64" }, { "os": "macos-${{ vars.MACOS_SILICON_RUNNER_VERSION || '14' }}", "label": "macOS-Silicon", "platform": "osx-aarch_64" }]' else RUNNERS='[{ "os": "ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }}", "label": "Linux", "platform": "linux-x86_64" }]' fi diff --git a/.github/workflows/cicd_comp_deployment-phase.yml b/.github/workflows/cicd_comp_deployment-phase.yml index 96ee509b0b02..76ad7ab3fcfb 100644 --- a/.github/workflows/cicd_comp_deployment-phase.yml +++ b/.github/workflows/cicd_comp_deployment-phase.yml @@ -37,7 +37,14 @@ on: type: boolean publish-npm-sdk-libs: default: false - type: boolean + type: boolean + outputs: + docker_tags: + description: 'Docker image tags that were built' + value: ${{ jobs.deployment.outputs.docker_tags }} + formatted_tags: + description: 'Formatted Docker tags for notifications' + value: ${{ jobs.deployment.outputs.formatted_tags }} secrets: DOCKER_USERNAME: required: false @@ -67,6 +74,9 @@ jobs: # Use of Docker environments to enable per-deployment environment secrets # This allows for different secrets to be used based on the deployment environment environment: ${{ inputs.environment }} + outputs: + docker_tags: ${{ steps.docker_build.outputs.tags }} + formatted_tags: ${{ steps.format-tags.outputs.formatted_tags }} steps: # Checkout the repository - uses: actions/checkout@v4 @@ -108,6 +118,21 @@ jobs: DOTCMS_DOCKER_TAG=${{ inputs.environment }} SDKMAN_JAVA_VERSION=${{ steps.get-sdkman-version.outputs.SDKMAN_JAVA_VERSION }} + # Format tags for Slack notifications + - name: Format Tags + id: format-tags + run: | + tags='' + tags_arr=( ${{ steps.docker_build.outputs.tags }} ) + + for tag in "${tags_arr[@]}" + do + [[ -n "${tags}" ]] && tags="${tags}, " + tags="${tags}\`${tag}\`" + done + + echo "formatted_tags=${tags}" >> $GITHUB_OUTPUT + # Build and push the dev Docker image (if required) - name: Build/Push Docker Dev Image id: docker_build_dev diff --git a/.github/workflows/cicd_comp_release-phase.yml b/.github/workflows/cicd_comp_release-phase.yml new file mode 100644 index 000000000000..7770a7d42062 --- /dev/null +++ b/.github/workflows/cicd_comp_release-phase.yml @@ -0,0 +1,216 @@ +# Release Phase Workflow +# +# This reusable workflow handles release-specific finalization operations: +# - Deploying artifacts to Artifactory (Maven repository) +# - Generating and uploading Javadocs to S3 +# - Triggering plugin repository updates +# - Generating and uploading SBOM (Software Bill of Materials) +# - Updating GitHub issue labels for release tracking +# +# This phase runs after the standard deployment phase (which handles Docker/NPM) +# and focuses on release-specific operations. +# +# Key features: +# - Configurable release operations (artifacts, javadocs, plugins, labels) +# - SBOM generation and GitHub release asset upload +# - GitHub issue label management for release tracking +# - AWS S3 integration for javadoc hosting + +name: Release Phase + +on: + workflow_call: + inputs: + release_version: + description: 'Release version' + required: true + type: string + release_tag: + description: 'Release tag' + required: true + type: string + artifact_run_id: + description: 'Artifact run ID' + required: false + type: string + default: ${{ github.run_id }} + deploy_artifact: + description: 'Deploy artifact to Artifactory' + type: boolean + default: true + upload_javadocs: + description: 'Upload Javadocs to S3' + type: boolean + default: true + update_plugins: + description: 'Update Plugins' + type: boolean + default: true + update_github_labels: + description: 'Update GitHub labels' + type: boolean + default: true + secrets: + EE_REPO_USERNAME: + required: false + description: 'Artifactory username' + EE_REPO_PASSWORD: + required: false + description: 'Artifactory password' + AWS_ACCESS_KEY_ID: + required: false + description: 'AWS access key ID' + AWS_SECRET_ACCESS_KEY: + required: false + description: 'AWS secret access key' + CI_MACHINE_TOKEN: + required: false + description: 'CI machine token for GitHub API operations' + +jobs: + # Deploy release artifacts to Artifactory and S3 + release-artifacts: + name: Release Artifacts + runs-on: ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }} + env: + AWS_REGION: us-east-1 + JVM_TEST_MAVEN_OPTS: '-e -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn' + steps: + - name: Checkout core + uses: actions/checkout@v4 + with: + ref: ${{ inputs.release_tag }} + + - uses: ./.github/actions/core-cicd/cleanup-runner + + - name: Setup Java + id: setup-java + uses: ./.github/actions/core-cicd/setup-java + + - name: Restore Maven Repository + uses: actions/download-artifact@v4 + with: + name: maven-repo + path: ~/.m2/repository + + - name: Configure Maven Settings + uses: whelk-io/maven-settings-xml-action@v20 + with: + servers: '[{ "id": "dotcms-libs-local", "username": "${{ secrets.EE_REPO_USERNAME }}", "password": "${{ secrets.EE_REPO_PASSWORD }}" }]' + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + if: inputs.upload_javadocs == true + + - name: Deploy Release Artifacts to Artifactory + run: | + ./mvnw -ntp \ + "${JVM_TEST_MAVEN_OPTS}" \ + -Dprod=true \ + -DskipTests=true \ + deploy + if: inputs.deploy_artifact == true + + - name: Generate and Upload Javadocs + run: | + ./mvnw -ntp \ + "${JVM_TEST_MAVEN_OPTS}" \ + javadoc:javadoc \ + -pl :dotcms-core + rc=$? + if [[ $rc != 0 ]]; then + echo "Javadoc generation failed with exit code $rc" + exit $rc + fi + + site_dir=./dotCMS/target/site + javadoc_dir=${site_dir}/javadocs + s3_uri=s3://static.dotcms.com/docs/${{ inputs.release_version }}/javadocs + + mv ${site_dir}/apidocs ${javadoc_dir} + echo "Running: aws s3 cp ${javadoc_dir} ${s3_uri} --recursive" + aws s3 cp ${javadoc_dir} ${s3_uri} --recursive + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + if: inputs.upload_javadocs == true + + - name: Trigger Plugin Repository Update + env: + RELEASE_VERSION: ${{ inputs.release_version }} + CI_MACHINE_TOKEN: ${{ secrets.CI_MACHINE_TOKEN }} + run: | + # shellcheck disable=SC2153 + release_version="${RELEASE_VERSION}" + response=$(curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${CI_MACHINE_TOKEN}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/dotCMS/plugin-seeds/dispatches \ + -d "{\"event_type\": \"on-plugins-release\", \"client_payload\": {\"release_version\": \"${release_version}\"}}" \ + -w "\n%{http_code}" \ + -s) + http_code=$(echo "$response" | tail -n1) + if [ "${http_code}" != "204" ]; then + echo "Failed to dispatch workflow. HTTP code: $http_code" + echo "Response: $response" + fi + if: inputs.update_plugins == true + + # Generate and upload SBOM to GitHub release + release-sbom: + name: Release SBOM + runs-on: ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }} + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - uses: ./.github/actions/legacy-release/sbom-generator + id: sbom-generator + with: + dotcms_version: ${{ inputs.release_version }} + github_token: ${{ secrets.CI_MACHINE_TOKEN }} + + - name: Download SBOM Artifacts + uses: actions/download-artifact@v4 + with: + path: ${{ github.workspace }}/artifacts + pattern: ${{ steps.sbom-generator.outputs.sbom-artifact }} + + - name: Upload SBOM to GitHub Release + env: + GITHUB_TOKEN: ${{ secrets.CI_MACHINE_TOKEN }} + run: | + echo "::group::Upload SBOM Asset" + ARTIFACT_NAME=${{ steps.sbom-generator.outputs.sbom-artifact }} + SBOM="./artifacts/${ARTIFACT_NAME}/${ARTIFACT_NAME}.json" + + if [ -f "${SBOM}" ]; then + echo "SBOM: ${SBOM}" + cat "${SBOM}" + + zip "${ARTIFACT_NAME}.zip" "${SBOM}" + gh release upload "${{ inputs.release_tag }}" "${ARTIFACT_NAME}.zip" + else + echo "SBOM artifact not found." + fi + echo "::endgroup::" + + # Update GitHub labels for release tracking + # Only updates labels if release-artifacts (Artifactory/Javadocs) succeeded. + # The calling workflow's dependency chain ensures deployment also succeeded. + release-labeling: + name: Release Labeling + needs: [ release-artifacts ] + if: success() && inputs.update_github_labels == true + uses: ./.github/workflows/issue_comp_release-labeling.yml + with: + new_label: 'Release : ${{ inputs.release_version }}' + rename_label: 'Next Release' + secrets: + CI_MACHINE_TOKEN: ${{ secrets.CI_MACHINE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/cicd_comp_release-prepare-phase.yml b/.github/workflows/cicd_comp_release-prepare-phase.yml new file mode 100644 index 000000000000..320d83552024 --- /dev/null +++ b/.github/workflows/cicd_comp_release-prepare-phase.yml @@ -0,0 +1,182 @@ +# Release Prepare Phase Workflow +# +# This reusable workflow is responsible for preparing a release by: +# - Validating release version format +# - Creating release branch +# - Setting release version in maven.config +# - Updating LICENSE file Change Date +# - Creating initial GitHub release +# - Caching build artifacts for subsequent phases +# +# Key features: +# - Version validation (standard and LTS formats) +# - Automatic branch and tag management +# - Maven configuration setup for production builds +# - Build artifact caching for reuse +# - GitHub release creation + +name: Release Prepare Phase + +on: + workflow_call: + inputs: + release_version: + description: 'Release Version (yy.mm.dd-## or yy.mm.dd_lts_v##)' + required: true + type: string + release_commit: + description: 'Commit Hash (default to latest commit)' + required: false + type: string + default: '' + secrets: + CI_MACHINE_TOKEN: + required: false + description: 'CI machine token for GitHub operations (defaults to GITHUB_TOKEN)' + CI_MACHINE_USER: + required: false + description: 'CI machine user for git commits (defaults to github-actions[bot])' + outputs: + release_version: + value: ${{ jobs.prepare.outputs.release_version }} + release_tag: + value: ${{ jobs.prepare.outputs.release_tag }} + release_branch: + value: ${{ jobs.prepare.outputs.release_branch }} + release_commit: + value: ${{ jobs.prepare.outputs.release_commit }} + release_hash: + value: ${{ jobs.prepare.outputs.release_hash }} + is_lts: + value: ${{ jobs.prepare.outputs.is_lts }} + is_latest: + value: ${{ jobs.prepare.outputs.is_latest }} + date: + value: ${{ jobs.prepare.outputs.date }} + +jobs: + prepare: + name: Prepare Release + runs-on: ubuntu-${{ vars.UBUNTU_RUNNER_VERSION || '24.04' }} + outputs: + release_version: ${{ steps.set-version.outputs.release_version }} + release_tag: ${{ steps.set-version.outputs.release_tag }} + release_branch: ${{ steps.set-version.outputs.release_branch }} + release_commit: ${{ steps.set-version.outputs.release_commit }} + release_hash: ${{ steps.set-version.outputs.release_hash }} + is_lts: ${{ steps.set-version.outputs.is_lts }} + is_latest: ${{ steps.set-version.outputs.is_latest }} + date: ${{ steps.set-version.outputs.date }} + steps: + - name: Validate Release Version Format + env: + RELEASE_VERSION: ${{ inputs.release_version }} + run: | + # shellcheck disable=SC2153 + release_version="${RELEASE_VERSION}" + if [[ ! ${release_version} =~ ^[0-9]{2}.[0-9]{2}.[0-9]{2}(-[0-9]{1,2}|_lts_v[0-9]{1,2})$ ]]; then + echo 'Release version must be in the format yy.mm.dd-counter or yy.mm.dd_lts_v##' + exit 1 + fi + + - run: echo 'GitHub context' + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + + - name: Checkout core + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.CI_MACHINE_TOKEN || github.token }} + + - uses: ./.github/actions/core-cicd/cleanup-runner + + - name: Set Version Variables + id: set-version + env: + RELEASE_VERSION: ${{ inputs.release_version }} + RELEASE_COMMIT: ${{ inputs.release_commit }} + CI_MACHINE_USER: ${{ secrets.CI_MACHINE_USER || 'github-actions[bot]' }} + run: | + git config user.name "${CI_MACHINE_USER}" + git config user.email "${CI_MACHINE_USER}@users.noreply.github.com" + + # shellcheck disable=SC2153 + release_version="${RELEASE_VERSION}" + release_branch="release-${release_version}" + release_tag="v${release_version}" + # shellcheck disable=SC2153 + release_commit="${RELEASE_COMMIT}" + if [[ -z "${release_commit}" ]]; then + release_commit=$(git log -1 --pretty=%H) + fi + release_hash=${release_commit::7} + is_lts=false + is_latest=false + [[ ${release_version} =~ ^[0-9]{2}.[0-9]{2}.[0-9]{2}_lts_v[0-9]{1,2}$ ]] && is_lts=true + [[ ${release_version} =~ ^[0-9]{2}.[0-9]{2}.[0-9]{2}-[0-9]{1,2}$ ]] && is_latest=true + + { + echo "release_version=${release_version}" + echo "release_branch=${release_branch}" + echo "release_tag=${release_tag}" + echo "release_commit=${release_commit}" + echo "release_hash=${release_hash}" + echo "is_lts=${is_lts}" + echo "is_latest=${is_latest}" + echo "date=$(/bin/date -u "+%Y-%m")" + } >> "$GITHUB_OUTPUT" + + - name: Create Release Branch and Tag + id: create-branch + run: | + release_tag=${{ steps.set-version.outputs.release_tag }} + if git rev-parse "${release_tag}" >/dev/null 2>&1; then + echo "Tag ${release_tag} exists, removing it" + git push origin :refs/tags/${release_tag} + fi + + git reset --hard ${{ steps.set-version.outputs.release_commit }} + release_version=${{ steps.set-version.outputs.release_version }} + release_branch=${{ steps.set-version.outputs.release_branch }} + + remote=$(git ls-remote --heads https://github.com/dotCMS/core.git "${release_branch}" | wc -l | tr -d '[:space:]') + if [[ "${remote}" == '1' ]]; then + echo "Release branch ${release_branch} already exists, removing it" + git push origin :${release_branch} + fi + git checkout -b ${release_branch} + + # set version in .mvn/maven.config + echo "-Dprod=true" > .mvn/maven.config + echo "-Drevision=${release_version}" >> .mvn/maven.config + echo "-Dchangelist=" >> .mvn/maven.config + + git add .mvn/maven.config + + # Update LICENSE file Change Date + chmod +x .github/actions/update-license-date.sh + .github/actions/update-license-date.sh + + # Add LICENSE file if it was modified + if ! git diff --quiet HEAD -- LICENSE; then + echo "LICENSE file was updated, adding to commit" + git add LICENSE + fi + + git status + git commit -a -m "🏁 Publishing release version [${release_version}]" + git push origin ${release_branch} + + release_commit=$(git log -1 --pretty=%H) + echo "release_commit=${release_commit}" >> "$GITHUB_OUTPUT" + + - name: Create GitHub Release + run: | + curl -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + -H "Authorization: Bearer ${{ secrets.CI_MACHINE_TOKEN || github.token }}" \ + https://api.github.com/repos/${{ github.repository }}/releases \ + -d '{"tag_name": "${{ steps.set-version.outputs.release_tag }}", "name": "Release ${{ steps.set-version.outputs.release_version }}", "target_commitish": "${{ steps.create-branch.outputs.release_commit }}", "draft": false, "prerelease": false, "generate_release_notes": false}' + if: success() \ No newline at end of file diff --git a/.github/workflows/cicd_comp_test-phase.yml b/.github/workflows/cicd_comp_test-phase.yml index 3111d8cf0f1a..69812e9fd1ca 100644 --- a/.github/workflows/cicd_comp_test-phase.yml +++ b/.github/workflows/cicd_comp_test-phase.yml @@ -102,7 +102,9 @@ jobs: // Process each test type for (const [testType, testConfig] of Object.entries(config.test_types)) { - const shouldRun = inputs['run-all-tests'] || inputs[testConfig.condition_input]; + // Check if explicitly disabled (false) - this overrides run-all-tests + const isExplicitlyDisabled = inputs[testConfig.condition_input] === false; + const shouldRun = !isExplicitlyDisabled && (inputs['run-all-tests'] || inputs[testConfig.condition_input]); if (!shouldRun) { console.log(`Skipping ${testType} tests - not enabled`); diff --git a/.github/workflows/legacy-release_comp_maven-build-docker-image.yml b/.github/workflows/legacy-release_comp_maven-build-docker-image.yml index 4a9fe0845602..b021e453e456 100644 --- a/.github/workflows/legacy-release_comp_maven-build-docker-image.yml +++ b/.github/workflows/legacy-release_comp_maven-build-docker-image.yml @@ -25,6 +25,10 @@ on: required: false type: boolean default: false + custom_tag: + description: 'Custom Docker Image Tag' + required: false + type: string secrets: docker_io_username: description: 'Docker.io username' @@ -248,6 +252,7 @@ jobs: type=raw,value=${{ steps.set-common-vars.outputs.version }},enable=${{ steps.set-common-vars.outputs.is_release }} type=raw,value=latest,enable=${{ steps.set-common-vars.outputs.is_latest }} type=raw,value={{sha}},enable=${{ steps.set-common-vars.outputs.is_custom }} + type=raw,value=${{ inputs.custom_tag }},enable=${{ inputs.custom_tag != '' }} if: success() - name: Debug Docker Metadata diff --git a/.github/workflows/legacy-release_publish-dotcms-docker-image.yml b/.github/workflows/legacy-release_publish-dotcms-docker-image.yml index ddeea10d60f3..2eb475603012 100644 --- a/.github/workflows/legacy-release_publish-dotcms-docker-image.yml +++ b/.github/workflows/legacy-release_publish-dotcms-docker-image.yml @@ -16,6 +16,10 @@ on: - 'GHCR.IO' - 'BOTH' default: 'DOCKER.IO' + custom_tag: + description: 'Custom Docker Image Tag' + required: false + type: string jobs: prepare-build: name: Prepare build @@ -45,6 +49,7 @@ jobs: ref: ${{ needs.prepare-build.outputs.ref }} docker_platforms: ${{ needs.prepare-build.outputs.docker_platforms }} docker_registry: ${{ inputs.docker_registry }} + custom_tag: ${{ inputs.custom_tag }} secrets: docker_io_username: ${{ secrets.DOCKER_USERNAME }} docker_io_token: ${{ secrets.DOCKER_TOKEN }} diff --git a/.gitignore b/.gitignore index 1479aaa8a0c0..47af9ca53d7f 100644 --- a/.gitignore +++ b/.gitignore @@ -193,4 +193,17 @@ examples/astro/package-lock.json local/ -**/.yalc/ \ No newline at end of file +**/.yalc/ + +# Claude Code diagnostic outputs +.claude/diagnostics/ + +# Python cache files +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +*.egg diff --git a/.mise.md b/.mise.md new file mode 100644 index 000000000000..316f33c41b53 --- /dev/null +++ b/.mise.md @@ -0,0 +1,154 @@ +# Mise Configuration for dotCMS + +This repository uses [mise](https://mise.jdx.dev/) for managing development tool versions, including GitHub CLI and Python. + +## What is Mise? + +Mise (formerly rtx) is a polyglot tool version manager. It automatically installs and manages versions of tools like Python, Node.js, GitHub CLI, and many others. + +## Quick Start + +### 1. Install Mise + +```bash +# macOS +brew install mise + +# Or using the official installer +curl https://mise.run | sh +``` + +### 2. Activate Mise in Your Shell + +Add to your `~/.zshrc` (for zsh) or `~/.bashrc` (for bash): + +```bash +eval "$(mise activate zsh)" # for zsh +eval "$(mise activate bash)" # for bash +``` + +Then reload your shell: + +```bash +source ~/.zshrc # or source ~/.bashrc +# Or restart your terminal +``` + +### 3. Install Tools + +Navigate to the repository and mise will automatically install configured tools: + +```bash +cd /path/to/dotcms/core +mise install +``` + +Or let mise auto-install when you enter the directory: + +```bash +cd /path/to/dotcms/core +# Tools will install automatically if auto_install is enabled +``` + +## Configured Tools + +The `.mise.toml` file configures these tools: + +- **gh (GitHub CLI)** - `latest` version + - Used for issue and PR management + - Commands: `gh issue`, `gh pr`, etc. + +- **python** - `3.11.x` (latest 3.11) + - Used for cicd-diagnostics skill + - Automatically creates virtual environment in `.venv/` + +## Usage + +### Verify Installation + +```bash +mise doctor +``` + +### List Installed Tools + +```bash +mise list +``` + +### Check Current Tool Versions + +```bash +gh --version +python --version +``` + +### Python Virtual Environment + +Mise automatically creates a Python virtual environment in `.venv/`: + +```bash +# Activate venv (if needed manually) +source .venv/bin/activate + +# Install cicd-diagnostics dependencies +pip install -r .claude/skills/cicd-diagnostics/requirements.txt + +# Deactivate venv +deactivate +``` + +## Benefits of Using Mise + +1. **Consistent versions** - Everyone uses the same tool versions +2. **Automatic installation** - Tools install when entering the directory +3. **Per-project configuration** - Each project can have different versions +4. **Virtual environment management** - Automatic Python venv creation +5. **No PATH pollution** - Tools only available in project directory + +## Troubleshooting + +### Tools not found + +If `gh` or `python` commands still point to system versions: + +```bash +# Check if mise is activated +mise doctor + +# If not, activate mise +eval "$(mise activate $(basename $SHELL))" + +# Or add to your shell rc file permanently +``` + +### Force reinstall tools + +```bash +mise install --force +``` + +### Clear mise cache + +```bash +mise cache clear +``` + +## Related Documentation + +- [Mise Documentation](https://mise.jdx.dev/) +- [GitHub CLI Documentation](https://cli.github.com/manual/) +- [Python Documentation](https://docs.python.org/3.11/) + +## CI/CD Diagnostics Skill + +The Python installation is primarily for the cicd-diagnostics skill: + +```bash +# All scripts use the mise-managed Python +.claude/skills/cicd-diagnostics/fetch-logs.py +.claude/skills/cicd-diagnostics/fetch-jobs.py +.claude/skills/cicd-diagnostics/fetch-metadata.py +``` + +See `.claude/skills/cicd-diagnostics/README.md` for skill documentation. diff --git a/.mise.toml b/.mise.toml new file mode 100644 index 000000000000..47c720c7e28b --- /dev/null +++ b/.mise.toml @@ -0,0 +1,31 @@ +# Mise configuration for dotCMS development +# https://mise.jdx.dev/ +# +# To activate mise in your shell, add to your ~/.zshrc or ~/.bashrc: +# eval "$(mise activate zsh)" # for zsh +# eval "$(mise activate bash)" # for bash +# +# Or run in current shell: +# eval "$(mise activate $(basename $SHELL))" +# +# Then reload: source ~/.zshrc (or restart terminal) +# +# Verify installation: mise doctor + +[tools] +# GitHub CLI for issue/PR management +gh = "latest" + +# Python for cicd-diagnostics skill and automation scripts +python = "3.11" + +[env] +# Python virtual environment location +_.python.venv = { path = ".venv", create = true } + +[settings] +# Experimental features +experimental = true + +# Automatically install missing tools +auto_install = true diff --git a/CLAUDE.md b/CLAUDE.md index 420798bc7dfd..d0d40394350c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -452,13 +452,15 @@ LocalTransaction.wrapReturn(() -> { }); ``` -### Angular Development (core-web/) -```typescript -// Angular (REQUIRED modern syntax) -@if (condition()) { } // NOT *ngIf -data = input(); // NOT @Input() -spectator.setInput('prop', value); // Testing CRITICAL -``` +### Frontend Development (core-web/) +**For Angular/TypeScript/Nx development, see [core-web/CLAUDE.md](core-web/CLAUDE.md)** + +The core-web directory contains: +- Angular 19+ applications and libraries +- Modern component patterns with signals +- Jest/Spectator testing standards +- PrimeNG UI components +- Nx monorepo commands ```bash # Test Commands (fastest - no core rebuild needed!) @@ -496,12 +498,12 @@ cd core-web && nx run dotcms-ui:serve # Separate Frontend dev se ### Tech Stack - **Backend**: Java 21 runtime, Java 11 syntax (core), Maven, Spring/CDI -- **Frontend**: Angular 18.2.3, PrimeNG 17.18.11, NgRx Signals, Jest + Spectator +- **Frontend**: See [core-web/CLAUDE.md](core-web/CLAUDE.md) for Angular/TypeScript stack details - **Infrastructure**: Docker, PostgreSQL, Elasticsearch, GitHub Actions ### Critical Rules - **Maven versions**: Add to `bom/application/pom.xml` ONLY, never `dotCMS/pom.xml` -- **Testing**: ALWAYS use `data-testid` and `spectator.setInput()` +- **Frontend testing**: See [core-web/CLAUDE.md](core-web/CLAUDE.md) for Angular testing standards - **Security**: No hardcoded secrets, validate input, use Logger not System.out ## 📚 Documentation Navigation (Load On-Demand) @@ -635,6 +637,14 @@ Valid log levels: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, `FATAL`, `OFF` # List issues gh issue list --assignee @me ``` +- **Issue Templates**: Available templates in `.github/ISSUE_TEMPLATE/`: + - `task.yaml` - Technical tasks or improvements + - `defect.yaml` - Bug reports and defects + - `feature.yaml` - New features and enhancements + - `spike.yaml` - Research and exploration tasks + - `epic.yml` - Large initiatives spanning multiple issues + - `pillar.yml` - Strategic themes + - `ux.yaml` - UX improvements and design tasks - **Conventional Commits**: Use conventional commit format for all changes: ``` feat: add new workflow component @@ -765,11 +775,12 @@ try { ``` ## Summary Checklist + +### Backend Development (Java/Maven) - ✅ Use `Config.getProperty()` and `Logger.info(this, ...)` - ✅ Use `APILocator.getXXXAPI()` for services - ✅ Use `@Value.Immutable` for data objects - ✅ Use JAX-RS `@Path` for REST endpoints - **See [REST API Guide](dotCMS/src/main/java/com/dotcms/rest/CLAUDE.md)** -- ✅ Use `data-testid` for Angular testing - ✅ Use modern Java 21 syntax (Java 11 compatible) - ✅ Follow domain-driven package organization for new features - ✅ **@Schema Rules**: Match schema to actual return type (wrapped vs unwrapped) - **See [REST Guide](dotCMS/src/main/java/com/dotcms/rest/CLAUDE.md)** @@ -779,3 +790,6 @@ try { - ❌ **NEVER use `ResponseEntityView.class`** in `@Schema` - provides no meaningful API documentation - ❌ **NEVER omit `@Schema`** from @ApiResponse(200) - incomplete Swagger documentation - ❌ **NEVER use `@PathParam`** without corresponding @Path placeholder - use @QueryParam instead + +### Frontend Development (Angular/TypeScript) +- ✅ See **[core-web/CLAUDE.md](core-web/CLAUDE.md)** for complete Angular/TypeScript standards and modern syntax diff --git a/bom/application/pom.xml b/bom/application/pom.xml index 1005bca2b4ba..eaeeda916d95 100644 --- a/bom/application/pom.xml +++ b/bom/application/pom.xml @@ -1613,6 +1613,13 @@ 2.6 + + com.tngtech.archunit + archunit-junit5 + 1.4.1 + test + + org.wiremock wiremock diff --git a/core-web/AGENTS.md b/core-web/AGENTS.md new file mode 100644 index 000000000000..5cf3ccfda731 --- /dev/null +++ b/core-web/AGENTS.md @@ -0,0 +1,13 @@ + + + +# General Guidelines for working with Nx + +- When running tasks (for example build, lint, test, e2e, etc.), always prefer running the task through `nx` (i.e. `nx run`, `nx run-many`, `nx affected`) instead of using the underlying tooling directly +- You have access to the Nx MCP server and its tools, use them to help the user +- When answering questions about the repository, use the `nx_workspace` tool first to gain an understanding of the workspace architecture where applicable. +- When working in individual projects, use the `nx_project_details` mcp tool to analyze and understand the specific project structure and dependencies +- For questions around nx configuration, best practices or if you're unsure, use the `nx_docs` tool to get relevant, up-to-date docs. Always use this instead of assuming things about nx configuration +- If the user needs help with an Nx configuration or project graph error, use the `nx_workspace` tool to get any errors + + diff --git a/core-web/CLAUDE.md b/core-web/CLAUDE.md index 95bb7ac160f6..738cda575d1b 100644 --- a/core-web/CLAUDE.md +++ b/core-web/CLAUDE.md @@ -9,6 +9,7 @@ This is the **DotCMS Core-Web** monorepo - the frontend infrastructure for the D ## Key Development Commands ### Development Server + ```bash # Start main admin UI with backend proxy nx serve dotcms-ui @@ -21,6 +22,7 @@ nx serve dotcms-ui --configuration=development ``` ### Building + ```bash # Build main application nx build dotcms-ui @@ -35,6 +37,7 @@ nx affected:build ``` ### Testing + ```bash # Run all tests yarn run test:dotcms @@ -55,6 +58,7 @@ nx test dotcms-ui --coverage ``` ### Code Quality + ```bash # Lint all projects yarn run lint:dotcms @@ -71,6 +75,7 @@ nx affected:lint ``` ### Monorepo Management + ```bash # Visualize project dependencies nx dep-graph @@ -85,41 +90,66 @@ nx run-many --target=test --projects=sdk-client,sdk-react ## Architecture & Structure ### Monorepo Organization -- **apps/** - Main applications (dotcms-ui, dotcms-block-editor, dotcms-binary-field-builder, mcp-server) -- **libs/sdk/** - External-facing SDKs (client, react, angular, analytics, experiments, uve) -- **libs/data-access/** - Angular services for API communication -- **libs/ui/** - Shared UI components and patterns -- **libs/portlets/** - Feature-specific portlets (analytics, experiments, locales, etc.) -- **libs/dotcms-models/** - TypeScript interfaces and types -- **libs/block-editor/** - TipTap-based rich text editor -- **libs/template-builder/** - Template construction utilities + +- **apps/** - Main applications (dotcms-ui, dotcms-block-editor, dotcms-binary-field-builder, mcp-server) +- **libs/sdk/** - External-facing SDKs (client, react, angular, analytics, experiments, uve) +- **libs/data-access/** - Angular services for API communication +- **libs/ui/** - Shared UI components and patterns +- **libs/portlets/** - Feature-specific portlets (analytics, experiments, locales, etc.) +- **libs/dotcms-models/** - TypeScript interfaces and types +- **libs/block-editor/** - TipTap-based rich text editor +- **libs/template-builder/** - Template construction utilities ### Technology Stack -- **Angular 19.2.9** with standalone components -- **Nx 20.5.1** for monorepo management -- **PrimeNG 17.18.11** UI components -- **TipTap 2.14.0** for rich text editing -- **NgRx 19.2.1** for state management -- **Jest 29.7.0** for testing -- **Playwright** for E2E testing -- **Node.js >=v22.15.0** requirement + +- **Angular 19.2.9** with standalone components +- **Nx 20.5.1** for monorepo management +- **PrimeNG 17.18.11** UI components +- **TipTap 2.14.0** for rich text editing +- **NgRx 19.2.1** for state management +- **Jest 29.7.0** for testing +- **Playwright** for E2E testing +- **Node.js >=v22.15.0** requirement ### Component Conventions -- **Prefix**: All Angular components use `dot-` prefix -- **Naming**: Follow Angular style guide with kebab-case -- **Architecture**: Feature modules with lazy loading -- **State**: Component-store pattern with NgRx signals -- **Testing**: Jest unit tests + Playwright E2E + +- **Prefix**: All Angular components use `dot-` prefix +- **Naming**: Follow Angular style guide with kebab-case +- **Architecture**: Feature modules with lazy loading +- **State**: Component-store pattern with NgRx signals +- **Testing**: Jest unit tests + Playwright E2E + +### Modern Angular Syntax (REQUIRED) + +```typescript +// ✅ CORRECT: Modern control flow syntax +@if (condition()) { } // NOT *ngIf +@for (item of items(); track item.id) { } // NOT *ngFor + +// ✅ CORRECT: Modern input/output syntax +data = input(); // NOT @Input() +onChange = output(); // NOT @Output() + +// ✅ CRITICAL: Testing with Spectator +spectator.setInput('prop', value); // ALWAYS use setInput for inputs +spectator.detectChanges(); // Trigger change detection + +// ✅ CORRECT: Use data-testid for selectors + +const button = spectator.query('[data-testid="submit-button"]'); +``` ### Backend Integration -- **Development Proxy**: `proxy-dev.conf.mjs` routes `/api/*` to port 8080 -- **API Services**: Centralized in `libs/data-access` -- **Authentication**: Bearer token-based with `DotcmsConfigService` -- **Content Management**: Full CRUD through `DotHttpService` + +- **Development Proxy**: `proxy-dev.conf.mjs` routes `/api/*` to port 8080 +- **API Services**: Centralized in `libs/data-access` +- **Authentication**: Bearer token-based with `DotcmsConfigService` +- **Content Management**: Full CRUD through `DotHttpService` ## Development Workflows ### Local Development Setup + 1. Ensure Node.js >=v22.15.0 2. Run `yarn install` to install dependencies 3. Run `node prepare.js` to set up Husky git hooks @@ -127,6 +157,7 @@ nx run-many --target=test --projects=sdk-client,sdk-react 5. Run `nx serve dotcms-ui` for frontend development ### Adding New Features + 1. Create feature branch following naming convention 2. Add libraries in `libs/` for reusable code 3. Use existing patterns from similar features @@ -135,58 +166,100 @@ nx run-many --target=test --projects=sdk-client,sdk-react 6. Update TypeScript paths in `tsconfig.base.json` if adding new libraries ### SDK Development -- **Client SDK**: Core API client in `libs/sdk/client` -- **React SDK**: React components in `libs/sdk/react` -- **Angular SDK**: Angular services in `libs/sdk/angular` -- **Publishing**: Automated via npm with proper versioning + +- **Client SDK**: Core API client in `libs/sdk/client` +- **React SDK**: React components in `libs/sdk/react` +- **Angular SDK**: Angular services in `libs/sdk/angular` +- **Publishing**: Automated via npm with proper versioning ### Testing Strategy -- **Unit Tests**: Jest with comprehensive mocking utilities -- **E2E Tests**: Playwright for critical user workflows -- **Coverage**: Reports generated to `../../../target/core-web-reports/` -- **Mock Data**: Extensive mock utilities in `libs/utils-testing` + +- **Unit Tests**: Jest with comprehensive mocking utilities +- **E2E Tests**: Playwright for critical user workflows +- **Coverage**: Reports generated to `../../../target/core-web-reports/` +- **Mock Data**: Extensive mock utilities in `libs/utils-testing` ### Build Targets & Configurations -- **Development**: Proxy configuration with source maps -- **Production**: Optimized builds with tree shaking -- **Library**: Rollup/Vite builds for SDK packages -- **Web Components**: Stencil.js compilation for `dotcms-webcomponents` + +- **Development**: Proxy configuration with source maps +- **Production**: Optimized builds with tree shaking +- **Library**: Rollup/Vite builds for SDK packages +- **Web Components**: Stencil.js compilation for `dotcms-webcomponents` ## Important Notes ### TypeScript Configuration -- **Strict Mode**: Enabled across all projects -- **Path Mapping**: Extensive use of `@dotcms/*` barrel exports -- **Types**: Centralized in `libs/dotcms-models` and `libs/sdk/types` + +- **Strict Mode**: Enabled across all projects +- **Path Mapping**: Extensive use of `@dotcms/*` barrel exports +- **Types**: Centralized in `libs/dotcms-models` and `libs/sdk/types` ### State Management -- **NgRx**: Component stores with signals pattern -- **Global Store**: Centralized state in `libs/global-store` -- **Services**: Angular services for data access and business logic + +- **NgRx**: Component stores with signals pattern +- **Global Store**: Centralized state in `libs/global-store` +- **Services**: Angular services for data access and business logic ### Web Components -- **Stencil.js**: Framework-agnostic components in `libs/dotcms-webcomponents` -- **Legacy**: `libs/dotcms-field-elements` (deprecated, use Stencil components) -- **Integration**: Used across Angular, React, and vanilla JS contexts + +- **Stencil.js**: Framework-agnostic components in `libs/dotcms-webcomponents` +- **Legacy**: `libs/dotcms-field-elements` (deprecated, use Stencil components) +- **Integration**: Used across Angular, React, and vanilla JS contexts ### Performance Considerations -- **Lazy Loading**: Feature modules loaded on demand -- **Tree Shaking**: Proper barrel exports for optimal bundles -- **Caching**: Nx task caching for faster builds -- **Affected**: Only build/test changed projects in CI + +- **Lazy Loading**: Feature modules loaded on demand +- **Tree Shaking**: Proper barrel exports for optimal bundles +- **Caching**: Nx task caching for faster builds +- **Affected**: Only build/test changed projects in CI ## Debugging & Troubleshooting ### Common Issues -- **Proxy Errors**: Ensure backend is running on port 8080 -- **Build Failures**: Check TypeScript paths and circular dependencies -- **Test Failures**: Verify mock data and async handling -- **Linting**: Follow component naming conventions with `dot-` prefix + +- **Proxy Errors**: Ensure backend is running on port 8080 +- **Build Failures**: Check TypeScript paths and circular dependencies +- **Test Failures**: Verify mock data and async handling +- **Linting**: Follow component naming conventions with `dot-` prefix ### Development Tools -- **Nx Console**: VS Code extension for Nx commands -- **Angular DevTools**: Browser extension for debugging -- **Coverage Reports**: Check `target/core-web-reports/` for test coverage -- **Dependency Graph**: Use `nx dep-graph` to visualize project relationships -This codebase emphasizes consistency, testability, and maintainability through its monorepo architecture and established patterns. \ No newline at end of file +- **Nx Console**: VS Code extension for Nx commands +- **Angular DevTools**: Browser extension for debugging +- **Coverage Reports**: Check `target/core-web-reports/` for test coverage +- **Dependency Graph**: Use `nx dep-graph` to visualize project relationships + +This codebase emphasizes consistency, testability, and maintainability through its monorepo architecture and established patterns. + +## Summary Checklist + +### Angular/TypeScript Development + +- ✅ Use modern control flow: `@if`, `@for` (NOT `*ngIf`, `*ngFor`) +- ✅ Use modern inputs/outputs: `input()`, `output()` (NOT `@Input()`, `@Output()`) +- ✅ Use `data-testid` attributes for all testable elements +- ✅ Use `spectator.setInput()` for testing component inputs +- ✅ Follow `dot-` prefix convention for all components +- ✅ Use standalone components with lazy loading +- ✅ Use NgRx signals for state management +- ❌ Avoid legacy Angular syntax (`*ngIf`, `@Input()`, etc.) +- ❌ Avoid direct DOM queries without `data-testid` +- ❌ Never skip unit tests for new components + +### For Backend/Java Development + +- See **[../CLAUDE.md](../CLAUDE.md)** for Java, Maven, REST API, and Git workflow standards + + + + +# General Guidelines for working with Nx + +- When running tasks (for example build, lint, test, e2e, etc.), always prefer running the task through `nx` (i.e. `nx run`, `nx run-many`, `nx affected`) instead of using the underlying tooling directly +- You have access to the Nx MCP server and its tools, use them to help the user +- When answering questions about the repository, use the `nx_workspace` tool first to gain an understanding of the workspace architecture where applicable. +- When working in individual projects, use the `nx_project_details` mcp tool to analyze and understand the specific project structure and dependencies +- For questions around nx configuration, best practices or if you're unsure, use the `nx_docs` tool to get relevant, up-to-date docs. Always use this instead of assuming things about nx configuration +- If the user needs help with an Nx configuration or project graph error, use the `nx_workspace` tool to get any errors + + diff --git a/core-web/apps/dotcdn/project.json b/core-web/apps/dotcdn/project.json index 03265c41aec0..48038c5d1952 100644 --- a/core-web/apps/dotcdn/project.json +++ b/core-web/apps/dotcdn/project.json @@ -4,6 +4,7 @@ "projectType": "application", "sourceRoot": "apps/dotcdn/src", "prefix": "dotcms", + "tags": [], "targets": { "build": { "executor": "@angular-devkit/build-angular:browser", @@ -73,7 +74,8 @@ "production": { "buildTarget": "dotcdn:build:production" } - } + }, + "continuous": true }, "extract-i18n": { "executor": "@angular-devkit/build-angular:extract-i18n", @@ -101,9 +103,9 @@ "stylePreprocessorOptions": { "includePaths": ["libs/dotcms-scss/angular"] }, - "scripts": ["node_modules/chart.js/dist/Chart.js"] + "scripts": ["node_modules/chart.js/dist/Chart.js"], + "tsConfig": "apps/dotcdn/tsconfig.spec.json" } } - }, - "tags": [] + } } diff --git a/core-web/apps/dotcdn/src/app/app.component.html b/core-web/apps/dotcdn/src/app/app.component.html index dd507f015b41..f6bcec693636 100644 --- a/core-web/apps/dotcdn/src/app/app.component.html +++ b/core-web/apps/dotcdn/src/app/app.component.html @@ -1,105 +1,110 @@ - -
-
@@ -14,5 +14,5 @@
{{ 'dot.template.builder.sidebar.header.title' | dm }} [width]="4" [items]="containers" [actions]="['add']" - [containerMap]="containerMap"> + [containerMap]="containerMap" />
diff --git a/core-web/libs/template-builder/src/lib/components/template-builder/components/template-builder-theme-selector/template-builder-theme-selector.component.html b/core-web/libs/template-builder/src/lib/components/template-builder/components/template-builder-theme-selector/template-builder-theme-selector.component.html index e036860ded17..b99056114bda 100644 --- a/core-web/libs/template-builder/src/lib/components/template-builder/components/template-builder-theme-selector/template-builder-theme-selector.component.html +++ b/core-web/libs/template-builder/src/lib/components/template-builder/components/template-builder-theme-selector/template-builder-theme-selector.component.html @@ -8,7 +8,7 @@ #siteSelector dotSiteSelector optionLabel="name" - filterBy="name"> + filterBy="name" /> diff --git a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.html b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.html index c3c4b26b79d1..16642ac27b7d 100644 --- a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.html +++ b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.html @@ -1,7 +1,7 @@ @if (vm$ | async; as vm) {
- +
- + data-testId="template-builder-actions" /> +
- @for ( - box of row.subGridOpts?.children; - track identify(i, box); - let i = $index - ) { + @for (box of row.subGridOpts?.children; track identify($index, box)) {
+ class="grid-stack-item-content" />
}
diff --git a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.spec.ts b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.spec.ts index 3afbd1e0f7c8..83c816980211 100644 --- a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.spec.ts +++ b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.spec.ts @@ -1,19 +1,15 @@ import { expect, it } from '@jest/globals'; import { byTestId, createComponentFactory, Spectator } from '@ngneat/spectator'; -import { AsyncPipe, NgClass, NgFor, NgIf, NgStyle } from '@angular/common'; import { HttpClientTestingModule } from '@angular/common/http/testing'; import { By } from '@angular/platform-browser'; -import { DividerModule } from 'primeng/divider'; -import { DialogService, DynamicDialogModule, DynamicDialogRef } from 'primeng/dynamicdialog'; -import { ToolbarModule } from 'primeng/toolbar'; +import { DialogService, DynamicDialogRef } from 'primeng/dynamicdialog'; import { pluck, take } from 'rxjs/operators'; import { DotContainersService, DotEventsService, DotMessageService } from '@dotcms/data-access'; import { CoreWebService, LoginService, SiteService } from '@dotcms/dotcms-js'; -import { DotMessagePipe } from '@dotcms/ui'; import { containersMock, CoreWebServiceMock, @@ -22,7 +18,6 @@ import { SiteServiceMock } from '@dotcms/utils-testing'; -import { TemplateBuilderComponentsModule } from './components/template-builder-components.module'; import { DotGridStackWidget, SCROLL_DIRECTION } from './models/models'; import { DotTemplateBuilderStore } from './store/template-builder.store'; import { TemplateBuilderComponent } from './template-builder.component'; @@ -62,19 +57,7 @@ describe('TemplateBuilderComponent', () => { const createComponent = createComponentFactory({ component: TemplateBuilderComponent, - imports: [ - NgFor, - NgIf, - AsyncPipe, - DotMessagePipe, - DynamicDialogModule, - NgStyle, - NgClass, - ToolbarModule, - DividerModule, - TemplateBuilderComponentsModule, - HttpClientTestingModule - ], + imports: [HttpClientTestingModule], providers: [ DotTemplateBuilderStore, DialogService, diff --git a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.stories.ts b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.stories.ts index a89030da1d13..b7797a7fd424 100644 --- a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.stories.ts +++ b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.stories.ts @@ -1,16 +1,10 @@ import { Meta, moduleMetadata, StoryObj } from '@storybook/angular'; import { of } from 'rxjs'; -import { AsyncPipe, NgClass, NgFor, NgIf } from '@angular/common'; import { HttpClient, HttpClientModule } from '@angular/common/http'; -import { FormsModule } from '@angular/forms'; import { BrowserAnimationsModule } from '@angular/platform-browser/animations'; -import { ButtonModule } from 'primeng/button'; -import { DividerModule } from 'primeng/divider'; -import { DropdownModule } from 'primeng/dropdown'; -import { DialogService, DynamicDialogModule, DynamicDialogRef } from 'primeng/dynamicdialog'; -import { ToolbarModule } from 'primeng/toolbar'; +import { DialogService, DynamicDialogRef } from 'primeng/dynamicdialog'; import { DotContainersService, @@ -20,7 +14,6 @@ import { } from '@dotcms/data-access'; import { CoreWebService, SiteService } from '@dotcms/dotcms-js'; import { DotLayout } from '@dotcms/dotcms-models'; -import { DotMessagePipe } from '@dotcms/ui'; import { CoreWebServiceMock, DotContainersServiceMock, @@ -28,7 +21,6 @@ import { } from '@dotcms/utils-testing'; import { JsonClassesService } from './components/add-style-classes-dialog/services/json-classes.service'; -import { TemplateBuilderComponentsModule } from './components/template-builder-components.module'; import { DotTemplateBuilderStore } from './store/template-builder.store'; import { TemplateBuilderComponent } from './template-builder.component'; import { @@ -43,22 +35,7 @@ const meta: Meta = { component: TemplateBuilderComponent, decorators: [ moduleMetadata({ - imports: [ - NgFor, - NgIf, - AsyncPipe, - NgClass, - TemplateBuilderComponentsModule, - DotMessagePipe, - BrowserAnimationsModule, - DynamicDialogModule, - HttpClientModule, - ButtonModule, - ToolbarModule, - DividerModule, - DropdownModule, - FormsModule - ], + imports: [BrowserAnimationsModule, HttpClientModule], providers: [ DotTemplateBuilderStore, DialogService, diff --git a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.ts b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.ts index 93b72890e156..ef66bb39fbff 100644 --- a/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.ts +++ b/core-web/libs/template-builder/src/lib/components/template-builder/template-builder.component.ts @@ -9,6 +9,7 @@ import { import { DDElementHost } from 'gridstack/dist/dd-element'; import { Observable, Subject, combineLatest } from 'rxjs'; +import { AsyncPipe, NgClass, NgStyle } from '@angular/common'; import { ChangeDetectionStrategy, ChangeDetectorRef, @@ -28,7 +29,9 @@ import { inject } from '@angular/core'; -import { DialogService, DynamicDialogRef } from 'primeng/dynamicdialog'; +import { DividerModule } from 'primeng/divider'; +import { DialogService, DynamicDialogModule, DynamicDialogRef } from 'primeng/dynamicdialog'; +import { ToolbarModule } from 'primeng/toolbar'; import { filter, take, map, takeUntil, skip } from 'rxjs/operators'; @@ -42,11 +45,16 @@ import { DotContainerMap, DotTemplate } from '@dotcms/dotcms-models'; +import { DotMessagePipe } from '@dotcms/ui'; import { colIcon, rowIcon } from './assets/icons'; import { AddStyleClassesDialogComponent } from './components/add-style-classes-dialog/add-style-classes-dialog.component'; import { AddWidgetComponent } from './components/add-widget/add-widget.component'; +import { TemplateBuilderActionsComponent } from './components/template-builder-actions/template-builder-actions.component'; +import { TemplateBuilderBoxComponent } from './components/template-builder-box/template-builder-box.component'; import { TemplateBuilderRowComponent } from './components/template-builder-row/template-builder-row.component'; +import { TemplateBuilderSectionComponent } from './components/template-builder-section/template-builder-section.component'; +import { TemplateBuilderSidebarComponent } from './components/template-builder-sidebar/template-builder-sidebar.component'; import { TemplateBuilderThemeSelectorComponent } from './components/template-builder-theme-selector/template-builder-theme-selector.component'; import { BOX_WIDTH, @@ -76,7 +84,21 @@ import { styleUrls: ['./template-builder.component.scss'], changeDetection: ChangeDetectionStrategy.OnPush, providers: [DotTemplateBuilderStore], - standalone: false + imports: [ + AsyncPipe, + NgClass, + NgStyle, + DotMessagePipe, + DynamicDialogModule, + ToolbarModule, + DividerModule, + AddWidgetComponent, + TemplateBuilderActionsComponent, + TemplateBuilderSectionComponent, + TemplateBuilderSidebarComponent, + TemplateBuilderRowComponent, + TemplateBuilderBoxComponent + ] }) export class TemplateBuilderComponent implements OnDestroy, OnChanges, OnInit { private store = inject(DotTemplateBuilderStore); @@ -355,13 +377,21 @@ export class TemplateBuilderComponent implements OnDestroy, OnChanges, OnInit { /** * @description This method is used to identify items by id * - * @param {number} _ + * @param {number} index * @param {GridStackWidget} w * @return {*} * @memberof TemplateBuilderComponent */ - identify(_: number, w: GridStackWidget): string { - return w.id as string; + identify(index: number, w: GridStackWidget): string { + // Ensure we always return a unique string + // Combine ID with index to prevent Angular 20 NG0955 errors about duplicate keys + // This handles cases where the same ID might appear in different rows + const id = w?.id; + if (id != null && id !== '') { + return `${String(id)}-${index}`; + } + // Fallback to index if ID is not available + return `item-${index}`; } /** @@ -378,7 +408,11 @@ export class TemplateBuilderComponent implements OnDestroy, OnChanges, OnInit { ): void { // The gridstack model is polutted with the subgrid data // So we need to delete the node from the GridStack Model - this.grid.engine.nodes.find((node) => node.id === rowID).subGrid?.removeWidget(element); + if (this.grid?.engine) { + this.grid.engine.nodes + .find((node) => node.id === rowID) + ?.subGrid?.removeWidget(element); + } this.store.removeColumn({ ...column, parentId: rowID as string }); } diff --git a/core-web/libs/template-builder/src/lib/template-builder.module.ts b/core-web/libs/template-builder/src/lib/template-builder.module.ts deleted file mode 100644 index 8fc67f8821ca..000000000000 --- a/core-web/libs/template-builder/src/lib/template-builder.module.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { AsyncPipe, NgClass, NgFor, NgIf, NgStyle } from '@angular/common'; -import { NgModule } from '@angular/core'; - -import { DividerModule } from 'primeng/divider'; -import { DialogService, DynamicDialogModule, DynamicDialogRef } from 'primeng/dynamicdialog'; -import { ToolbarModule } from 'primeng/toolbar'; - -import { DotMessagePipe } from '@dotcms/ui'; - -import { DotLayoutPropertiesComponent } from './components/template-builder/components/dot-layout-properties/dot-layout-properties.component'; -import { TemplateBuilderComponentsModule } from './components/template-builder/components/template-builder-components.module'; -import { TemplateBuilderComponent } from './components/template-builder/template-builder.component'; - -@NgModule({ - imports: [ - NgIf, - NgFor, - AsyncPipe, - DotMessagePipe, - DynamicDialogModule, - NgStyle, - NgClass, - ToolbarModule, - DividerModule, - TemplateBuilderComponentsModule - ], - declarations: [TemplateBuilderComponent], - providers: [DialogService, DynamicDialogRef], - exports: [TemplateBuilderComponent, DotLayoutPropertiesComponent] -}) -export class TemplateBuilderModule {} diff --git a/core-web/libs/ui/project.json b/core-web/libs/ui/project.json index 761238c26bc2..c8dcfd5e1c9b 100644 --- a/core-web/libs/ui/project.json +++ b/core-web/libs/ui/project.json @@ -4,6 +4,7 @@ "projectType": "library", "sourceRoot": "libs/ui/src", "prefix": "dotcms", + "tags": [], "targets": { "lint": { "executor": "@nx/eslint:lint", @@ -14,7 +15,8 @@ "outputs": ["{workspaceRoot}/coverage/{projectRoot}"], "options": { "jestConfig": "libs/ui/jest.config.ts", - "passWithNoTests": true + "passWithNoTests": true, + "tsConfig": "libs/ui/tsconfig.spec.json" }, "configurations": { "ci": { @@ -23,6 +25,5 @@ } } } - }, - "tags": [] + } } diff --git a/core-web/libs/ui/src/index.ts b/core-web/libs/ui/src/index.ts index e423b3aa72b8..a77a9adb7717 100644 --- a/core-web/libs/ui/src/index.ts +++ b/core-web/libs/ui/src/index.ts @@ -1,7 +1,7 @@ -// Modules -export * from './lib/dot-icon/dot-icon.module'; -export * from './lib/dot-spinner/dot-spinner.module'; -export * from './lib/modules/dot-dialog/dot-dialog.module'; +// Components (now standalone) +export * from './lib/dot-icon/dot-icon.component'; +export * from './lib/dot-spinner/dot-spinner.component'; +export * from './lib/modules/dot-dialog/dot-dialog.component'; // Components export * from './lib/components/add-to-bundle/dot-add-to-bundle.component'; export * from './lib/components/dot-action-menu-button/dot-action-menu-button.component'; @@ -55,14 +55,15 @@ export * from './lib/dot-contentlet-status/dot-contentlet-status.pipe'; export * from './lib/dot-message/dot-message.pipe'; export * from './lib/pipes/dot-diff/dot-diff.pipe'; export * from './lib/pipes/dot-file-size-format/dot-file-size-format.pipe'; +export * from './lib/pipes/dot-folder-name/dot-folder-name.pipe'; export * from './lib/pipes/dot-highlight/dot-highlight.pipe'; export * from './lib/pipes/dot-iso-code/dot-iso-code.pipe'; +export * from './lib/pipes/dot-locale-tag/dot-locale-tag.pipe'; export * from './lib/pipes/dot-relative-date/dot-relative-date.pipe'; export * from './lib/pipes/dot-safe-html/dot-safe-html.pipe'; export * from './lib/pipes/dot-string-format/dot-string-format.pipe'; export * from './lib/pipes/dot-timestamp-to-date/dot-timestamp-to-date.pipe'; export * from './lib/pipes/safe-url/safe-url.pipe'; -export * from './lib/pipes/dot-folder-name/dot-folder-name.pipe'; // Resolvers export * from './lib/resolvers/dot-analytics-health-check.resolver.service'; export * from './lib/resolvers/dot-enterprise-license-resolver.service'; diff --git a/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.html b/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.html index e97c3786873a..b573b874fc36 100644 --- a/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.html +++ b/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.html @@ -18,12 +18,10 @@ appendTo="body" editable="true" formControlName="addBundle" - optionLabel="name"> + optionLabel="name" /> + message="{{ 'contenttypes.content.add_to_bundle.errormsg' | dm }}" />
diff --git a/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.ts b/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.ts index 9e6f4030a0d6..ee72a42e8473 100644 --- a/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.ts +++ b/core-web/libs/ui/src/lib/components/add-to-bundle/dot-add-to-bundle.component.ts @@ -28,7 +28,7 @@ import { LoggerService } from '@dotcms/dotcms-js'; import { DotAjaxActionResponseView, DotBundle, DotDialogActions } from '@dotcms/dotcms-models'; import { DotMessagePipe } from '../../dot-message/dot-message.pipe'; -import { DotDialogModule } from '../../modules/dot-dialog/dot-dialog.module'; +import { DotDialogComponent } from '../../modules/dot-dialog/dot-dialog.component'; import { DotFieldValidationMessageComponent } from '../dot-field-validation-message/dot-field-validation-message.component'; const LAST_BUNDLE_USED = 'lastSelectedBundle'; @@ -37,7 +37,7 @@ const LAST_BUNDLE_USED = 'lastSelectedBundle'; selector: 'dot-add-to-bundle', templateUrl: 'dot-add-to-bundle.component.html', imports: [ - DotDialogModule, + DotDialogComponent, DotMessagePipe, ReactiveFormsModule, DropdownModule, diff --git a/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.html b/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.html index 68cd675d8a5a..70f00b837dfb 100644 --- a/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.html +++ b/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.html @@ -8,7 +8,7 @@ [icon]="filteredActions[0].icon" [pTooltip]="filteredActions[0].label" styleClass="p-button-sm p-button-rounded p-button-text" - data-testid="dot-action-tooltip-button"> + data-testid="dot-action-tooltip-button" /> } @else { } diff --git a/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.ts b/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.ts index f8fb2f933cb4..606ac099fd5d 100644 --- a/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.ts +++ b/core-web/libs/ui/src/lib/components/dot-action-menu-button/dot-action-menu-button.component.ts @@ -47,7 +47,7 @@ export class DotActionMenuButtonComponent implements OnInit { command: ($event: DotActionMenuClickEvent) => { action.menuItem.command(this.item); - $event.originalEvent.stopPropagation(); + $event?.originalEvent?.stopPropagation(); } }; }); diff --git a/core-web/libs/ui/src/lib/components/dot-ai-image-prompt/ai-image-prompt.store.ts b/core-web/libs/ui/src/lib/components/dot-ai-image-prompt/ai-image-prompt.store.ts index 45886b0be778..402830023561 100644 --- a/core-web/libs/ui/src/lib/components/dot-ai-image-prompt/ai-image-prompt.store.ts +++ b/core-web/libs/ui/src/lib/components/dot-ai-image-prompt/ai-image-prompt.store.ts @@ -148,8 +148,8 @@ export class DotAiImagePromptStore extends ComponentStore { + tapResponse({ + next: (response) => { this.updateImageState( response, formValue, @@ -158,7 +158,7 @@ export class DotAiImagePromptStore extends ComponentStore { + error: (error: string) => { this.updateImageState( null, formValue, @@ -167,10 +167,9 @@ export class DotAiImagePromptStore extends ComponentStore { + tapResponse({ + next: (response) => { const newImage: DotGeneratedAIImage = { request: formValue, response: response, error: null }; - if (isImageWithError) { imagesArray[galleryActiveIndex] = newImage; } else { imagesArray.push(newImage); } - patchState(store, { status: ComponentStatus.IDLE, images: imagesArray, @@ -129,13 +127,13 @@ export const DotAiImagePromptStore = signalStore( : imagesArray.length - 1 }); }, - (error: string) => { + error: (error: string) => { patchState(store, { status: ComponentStatus.ERROR, error: error }); } - ) + }) ); }) ) diff --git a/core-web/libs/ui/src/lib/components/dot-api-link/dot-api-link.component.html b/core-web/libs/ui/src/lib/components/dot-api-link/dot-api-link.component.html index ef3d986f0756..9279c4433ef4 100644 --- a/core-web/libs/ui/src/lib/components/dot-api-link/dot-api-link.component.html +++ b/core-web/libs/ui/src/lib/components/dot-api-link/dot-api-link.component.html @@ -1 +1 @@ - + diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.html b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.html index b8e97ff43bfe..f36b2a5f6380 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.html +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.html @@ -10,26 +10,26 @@ @if (contentlet[0]) { + [contentlet]="contentlet[0]" /> } @if (contentlet[1]) { + [contentlet]="contentlet[1]" /> }
} @else { - + }
@for (loadingItem of loadingItems; track loadingItem) {
- - + +
}
diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.ts b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.ts index 62aa1dc41b55..d12f69c8f326 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.ts +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-list/dot-asset-card-list.component.ts @@ -1,3 +1,4 @@ +import { NgTemplateOutlet } from '@angular/common'; import { ChangeDetectionStrategy, Component, @@ -24,7 +25,12 @@ const squarePlus = selector: 'dot-asset-card-list', templateUrl: './dot-asset-card-list.component.html', styleUrls: ['./dot-asset-card-list.component.scss'], - imports: [ScrollerModule, DotAssetCardComponent, DotAssetCardSkeletonComponent], + imports: [ + ScrollerModule, + DotAssetCardComponent, + DotAssetCardSkeletonComponent, + NgTemplateOutlet + ], changeDetection: ChangeDetectionStrategy.OnPush }) export class DotAssetCardListComponent implements OnChanges { diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-skeleton/dot-asset-card-skeleton.component.html b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-skeleton/dot-asset-card-skeleton.component.html index 38ed289b7b79..724591ee8b73 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-skeleton/dot-asset-card-skeleton.component.html +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card-skeleton/dot-asset-card-skeleton.component.html @@ -1,12 +1,12 @@ - + - +
- - + +
diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card/dot-asset-card.component.html b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card/dot-asset-card.component.html index 17561ba86abf..cbba385efde0 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card/dot-asset-card.component.html +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-card/dot-asset-card.component.html @@ -5,7 +5,7 @@ [contentlet]="contentlet" [iconSize]="'72px'" [showVideoThumbnail]="true" - data-testId="dot-contentlet-thumbnail"> + data-testId="dot-contentlet-thumbnail" />

@@ -14,7 +14,7 @@

{{ contentlet?.language }} - +
diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-search-dialog/dot-asset-search-dialog.component.html b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-search-dialog/dot-asset-search-dialog.component.html index cff0bebabcee..e5c8391aca84 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-search-dialog/dot-asset-search-dialog.component.html +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/components/dot-asset-search-dialog/dot-asset-search-dialog.component.html @@ -1 +1 @@ - + diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.html b/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.html index 41a5f479f1fa..cd48058a6398 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.html +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.html @@ -19,5 +19,5 @@ (nextBatch)="offset$.next($event)" [contentlets]="vm.contentlets" [done]="vm.preventScroll" - [loading]="vm.loading"> + [loading]="vm.loading" /> } diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.ts b/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.ts index 79a2df358f7f..37e6abd60caf 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.ts +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/dot-asset-search.component.ts @@ -20,7 +20,6 @@ import { InputTextModule } from 'primeng/inputtext'; import { debounceTime, skip, throttleTime } from 'rxjs/operators'; -import { DotContentSearchService, DotLanguagesService } from '@dotcms/data-access'; import { DotCMSContentlet, EditorAssetTypes } from '@dotcms/dotcms-models'; // services @@ -32,7 +31,7 @@ import { DotAssetSearchStore } from './store/dot-asset-search.store'; selector: 'dot-asset-search', templateUrl: './dot-asset-search.component.html', styleUrls: ['./dot-asset-search.component.scss'], - providers: [DotAssetSearchStore, DotContentSearchService, DotLanguagesService], + providers: [DotAssetSearchStore], imports: [DotAssetCardListComponent, DotAssetCardListComponent, InputTextModule, CommonModule], changeDetection: ChangeDetectionStrategy.OnPush }) diff --git a/core-web/libs/ui/src/lib/components/dot-asset-search/store/dot-asset-search.store.ts b/core-web/libs/ui/src/lib/components/dot-asset-search/store/dot-asset-search.store.ts index f54c7979b42e..79567ff905d4 100644 --- a/core-web/libs/ui/src/lib/components/dot-asset-search/store/dot-asset-search.store.ts +++ b/core-web/libs/ui/src/lib/components/dot-asset-search/store/dot-asset-search.store.ts @@ -82,12 +82,12 @@ export class DotAssetSearchStore extends ComponentStore { tap(() => this.updateLoading(true)), switchMap((params) => { return this.searchContentletsRequest(params).pipe( - tapResponse( - (contentlets) => this.updateContentlets(contentlets), - (_error) => { + tapResponse({ + next: (contentlets) => this.updateContentlets(contentlets), + error: (_error) => { /* */ } - ) + }) ); }) ); @@ -102,12 +102,12 @@ export class DotAssetSearchStore extends ComponentStore { return params$.pipe( switchMap((params) => this.searchContentletsRequest(params).pipe( - tapResponse( - (contentlets) => this.mergeContentlets(contentlets), - (_error) => { + tapResponse({ + next: (contentlets) => this.mergeContentlets(contentlets), + error: (_error) => { /* */ } - ) + }) ) ) ); diff --git a/core-web/libs/ui/src/lib/components/dot-binary-option-selector/dot-binary-option-selector.component.spec.ts b/core-web/libs/ui/src/lib/components/dot-binary-option-selector/dot-binary-option-selector.component.spec.ts index 02b34870fd41..df43d796d987 100644 --- a/core-web/libs/ui/src/lib/components/dot-binary-option-selector/dot-binary-option-selector.component.spec.ts +++ b/core-web/libs/ui/src/lib/components/dot-binary-option-selector/dot-binary-option-selector.component.spec.ts @@ -102,7 +102,8 @@ describe('DotBinaryOptionSelectorComponent', () => { name: 'options' }); - expect(radio.attributes['ng-reflect-value']).toEqual(DATA_MOCK.option1.value); + // Verify the component has the correct value for the binding + expect(component.firstOption.value).toEqual(DATA_MOCK.option1.value); }); it('should have icon', () => { @@ -152,7 +153,8 @@ describe('DotBinaryOptionSelectorComponent', () => { name: 'options' }); - expect(radio.attributes['ng-reflect-value']).toEqual(DATA_MOCK.option2.value); + // Verify the component has the correct value for the binding + expect(component.secondOption.value).toEqual(DATA_MOCK.option2.value); }); it('should have icon', () => { diff --git a/core-web/libs/ui/src/lib/components/dot-drop-zone/dot-drop-zone.component.html b/core-web/libs/ui/src/lib/components/dot-drop-zone/dot-drop-zone.component.html index 6dbc74306383..40b372640339 100644 --- a/core-web/libs/ui/src/lib/components/dot-drop-zone/dot-drop-zone.component.html +++ b/core-web/libs/ui/src/lib/components/dot-drop-zone/dot-drop-zone.component.html @@ -1 +1 @@ - + diff --git a/core-web/libs/ui/src/lib/components/dot-form-dialog/dot-form-dialog.component.html b/core-web/libs/ui/src/lib/components/dot-form-dialog/dot-form-dialog.component.html index a8fee188155f..6fe61b1dc75d 100644 --- a/core-web/libs/ui/src/lib/components/dot-form-dialog/dot-form-dialog.component.html +++ b/core-web/libs/ui/src/lib/components/dot-form-dialog/dot-form-dialog.component.html @@ -1,9 +1,9 @@
- +
- +
- +
- +
} diff --git a/core-web/libs/ui/src/lib/components/dot-sidebar-header/dot-sidebar-header.component.html b/core-web/libs/ui/src/lib/components/dot-sidebar-header/dot-sidebar-header.component.html index d6e2b463cb33..d0172c10059c 100644 --- a/core-web/libs/ui/src/lib/components/dot-sidebar-header/dot-sidebar-header.component.html +++ b/core-web/libs/ui/src/lib/components/dot-sidebar-header/dot-sidebar-header.component.html @@ -12,7 +12,7 @@

{{ dotTitle }}

@if (actionButtonTpl) {
- +
} diff --git a/core-web/libs/ui/src/lib/components/dot-workflow-actions/dot-workflow-actions.component.ts b/core-web/libs/ui/src/lib/components/dot-workflow-actions/dot-workflow-actions.component.ts index 9e4f256da805..5735dcbb65d8 100644 --- a/core-web/libs/ui/src/lib/components/dot-workflow-actions/dot-workflow-actions.component.ts +++ b/core-web/libs/ui/src/lib/components/dot-workflow-actions/dot-workflow-actions.component.ts @@ -1,4 +1,3 @@ -import { CommonModule } from '@angular/common'; import { ChangeDetectionStrategy, Component, @@ -31,7 +30,7 @@ interface WorkflowActionsGroup { @Component({ selector: 'dot-workflow-actions', - imports: [CommonModule, ButtonModule, SplitButtonModule, DotMessagePipe], + imports: [ButtonModule, SplitButtonModule, DotMessagePipe], templateUrl: './dot-workflow-actions.component.html', styleUrl: './dot-workflow-actions.component.scss', changeDetection: ChangeDetectionStrategy.OnPush diff --git a/core-web/libs/ui/src/lib/directives/dot-autofocus/dot-autofocus.directive.ts b/core-web/libs/ui/src/lib/directives/dot-autofocus/dot-autofocus.directive.ts index ef55ac67f378..c9c67498d75e 100644 --- a/core-web/libs/ui/src/lib/directives/dot-autofocus/dot-autofocus.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-autofocus/dot-autofocus.directive.ts @@ -1,8 +1,7 @@ import { Directive, ElementRef, OnInit, inject } from '@angular/core'; @Directive({ - selector: '[dotAutofocus]', - standalone: true + selector: '[dotAutofocus]' }) export class DotAutofocusDirective implements OnInit { private el = inject(ElementRef); diff --git a/core-web/libs/ui/src/lib/directives/dot-avatar/dot-avatar.directive.ts b/core-web/libs/ui/src/lib/directives/dot-avatar/dot-avatar.directive.ts index 2fbb0286dd44..96a9c1e4f00d 100644 --- a/core-web/libs/ui/src/lib/directives/dot-avatar/dot-avatar.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-avatar/dot-avatar.directive.ts @@ -3,8 +3,7 @@ import { ChangeDetectorRef, Directive, HostListener, Input, OnInit, inject } fro import { Avatar } from 'primeng/avatar'; @Directive({ - selector: 'p-avatar[dotAvatar]', - standalone: true + selector: 'p-avatar[dotAvatar]' }) export class DotAvatarDirective implements OnInit { private avatar = inject(Avatar); diff --git a/core-web/libs/ui/src/lib/directives/dot-dropdown.directive.ts b/core-web/libs/ui/src/lib/directives/dot-dropdown.directive.ts index 17fe303a43e0..115d189cdf60 100644 --- a/core-web/libs/ui/src/lib/directives/dot-dropdown.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-dropdown.directive.ts @@ -18,7 +18,6 @@ const DEFAULT_VALUE_NAME_INDEX = 'value'; * @class DotDropdownDirective */ @Directive({ - standalone: true, selector: '[dotDropdown]', providers: [DotMessagePipe] }) diff --git a/core-web/libs/ui/src/lib/directives/dot-dynamic.directive.ts b/core-web/libs/ui/src/lib/directives/dot-dynamic.directive.ts index dbff31eb7bf0..a684824bbc67 100644 --- a/core-web/libs/ui/src/lib/directives/dot-dynamic.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-dynamic.directive.ts @@ -1,7 +1,6 @@ import { Directive, ViewContainerRef, inject } from '@angular/core'; @Directive({ - standalone: true, selector: '[dotDynamic]' }) export class DotDynamicDirective { diff --git a/core-web/libs/ui/src/lib/directives/dot-gravatar/dot-gravatar.directive.ts b/core-web/libs/ui/src/lib/directives/dot-gravatar/dot-gravatar.directive.ts index 416cb834006a..54c698fa162a 100644 --- a/core-web/libs/ui/src/lib/directives/dot-gravatar/dot-gravatar.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-gravatar/dot-gravatar.directive.ts @@ -16,8 +16,7 @@ const FALLBACK_AVATAR_LETTER = 'A'; * If no email is provided, displays a default character. */ @Directive({ - selector: 'p-avatar[dotGravatar]', - standalone: true + selector: 'p-avatar[dotGravatar]' }) export class DotGravatarDirective { /** Reference to the PrimeNG Avatar component instance. */ diff --git a/core-web/libs/ui/src/lib/directives/dot-sidebar.directive.ts b/core-web/libs/ui/src/lib/directives/dot-sidebar.directive.ts index 3396fd6a2fb1..52c1aa5b801f 100644 --- a/core-web/libs/ui/src/lib/directives/dot-sidebar.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-sidebar.directive.ts @@ -17,7 +17,6 @@ export enum SIDEBAR_SIZES { * */ @Directive({ - standalone: true, selector: '[dotSidebar]' }) export class DotSidebarDirective { diff --git a/core-web/libs/ui/src/lib/directives/dot-state-restore/dot-state-restore.directive.ts b/core-web/libs/ui/src/lib/directives/dot-state-restore/dot-state-restore.directive.ts index ed469c35d9e3..573cdf89a2fd 100644 --- a/core-web/libs/ui/src/lib/directives/dot-state-restore/dot-state-restore.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-state-restore/dot-state-restore.directive.ts @@ -15,8 +15,7 @@ const enum StorageType { } @Directive({ - selector: '[dotStateRestore]', - standalone: true + selector: '[dotStateRestore]' }) export class DotStateRestoreDirective implements AfterViewInit { readonly #table = inject(Table); diff --git a/core-web/libs/ui/src/lib/directives/dot-string-template-outlet.directive.ts b/core-web/libs/ui/src/lib/directives/dot-string-template-outlet.directive.ts index 58417e00273e..125874e8cfbd 100644 --- a/core-web/libs/ui/src/lib/directives/dot-string-template-outlet.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-string-template-outlet.directive.ts @@ -31,7 +31,6 @@ class DotStringTemplateOutletContext { **/ @Directive({ - standalone: true, selector: '[dotStringTemplateOutlet]' }) export class DotStringTemplateOutletDirective implements OnChanges { diff --git a/core-web/libs/ui/src/lib/directives/dot-trim-input/dot-trim-input.directive.ts b/core-web/libs/ui/src/lib/directives/dot-trim-input/dot-trim-input.directive.ts index 22a9aaada27f..b0dff5b065c9 100644 --- a/core-web/libs/ui/src/lib/directives/dot-trim-input/dot-trim-input.directive.ts +++ b/core-web/libs/ui/src/lib/directives/dot-trim-input/dot-trim-input.directive.ts @@ -5,8 +5,7 @@ import { NgControl } from '@angular/forms'; * Directive for trimming the input value on blur. */ @Directive({ - selector: '[dotTrimInput]', - standalone: true + selector: '[dotTrimInput]' }) export class DotTrimInputDirective implements AfterViewInit { private readonly ngControl = inject(NgControl, { optional: true, self: true }); diff --git a/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.spec.ts b/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.spec.ts index ac17eeb985da..1a64bd528538 100644 --- a/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.spec.ts +++ b/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.spec.ts @@ -20,8 +20,7 @@ import { DotContainerOptionsDirective } from './dot-container-options.directive' imports: [DropdownModule, DotContainerOptionsDirective], template: ` - `, - standalone: true + ` }) class MockContainersDropdownComponent {} diff --git a/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.ts b/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.ts index 340e3ec0db8b..07663da128b1 100644 --- a/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.ts +++ b/core-web/libs/ui/src/lib/dot-container-options/dot-container-options.directive.ts @@ -24,8 +24,7 @@ const DEFAULT_VALUE_NAME_INDEX = 'value'; * @class DotContainerOptionsDirective */ @Directive({ - selector: 'p-dropdown[dotContainerOptions]', - standalone: true + selector: 'p-dropdown[dotContainerOptions]' }) export class DotContainerOptionsDirective implements OnInit { private readonly primeDropdown = inject(Dropdown, { optional: true, self: true }); diff --git a/core-web/libs/ui/src/lib/dot-contentlet-status/dot-contentlet-status.pipe.ts b/core-web/libs/ui/src/lib/dot-contentlet-status/dot-contentlet-status.pipe.ts index d87cf8da9dc0..7868947f720b 100644 --- a/core-web/libs/ui/src/lib/dot-contentlet-status/dot-contentlet-status.pipe.ts +++ b/core-web/libs/ui/src/lib/dot-contentlet-status/dot-contentlet-status.pipe.ts @@ -4,7 +4,6 @@ import { DotCMSContentlet } from '@dotcms/dotcms-models'; @Pipe({ name: 'dotContentletStatus', - standalone: true, pure: true }) export class DotContentletStatusPipe implements PipeTransform { diff --git a/core-web/libs/ui/src/lib/dot-field-required/dot-field-required.directive.ts b/core-web/libs/ui/src/lib/dot-field-required/dot-field-required.directive.ts index 86a510f88cb7..33a47f189cd7 100644 --- a/core-web/libs/ui/src/lib/dot-field-required/dot-field-required.directive.ts +++ b/core-web/libs/ui/src/lib/dot-field-required/dot-field-required.directive.ts @@ -7,8 +7,7 @@ import { FormGroupDirective, Validators } from '@angular/forms'; */ @Directive({ - selector: '[dotFieldRequired]', - standalone: true + selector: '[dotFieldRequired]' }) export class DotFieldRequiredDirective { private el = inject(ElementRef); diff --git a/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.spec.ts b/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.spec.ts index ac896efd5670..72e8ff0c7601 100644 --- a/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.spec.ts +++ b/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.spec.ts @@ -11,7 +11,7 @@ describe('DotIconComponent', () => { beforeEach(() => { TestBed.configureTestingModule({ - declarations: [DotIconComponent] + imports: [DotIconComponent] }).compileComponents(); fixture = TestBed.createComponent(DotIconComponent); diff --git a/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.ts b/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.ts index f8748d649187..8cf3701f491a 100644 --- a/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.ts +++ b/core-web/libs/ui/src/lib/dot-icon/dot-icon.component.ts @@ -10,8 +10,7 @@ import { Component, Input } from '@angular/core'; @Component({ selector: 'dot-icon', styleUrls: ['./dot-icon.component.scss'], - templateUrl: './dot-icon.component.html', - standalone: false + templateUrl: './dot-icon.component.html' }) export class DotIconComponent { @Input() name: string; diff --git a/core-web/libs/ui/src/lib/dot-icon/dot-icon.module.ts b/core-web/libs/ui/src/lib/dot-icon/dot-icon.module.ts deleted file mode 100644 index 1244b94ca99a..000000000000 --- a/core-web/libs/ui/src/lib/dot-icon/dot-icon.module.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { CommonModule } from '@angular/common'; -import { NgModule } from '@angular/core'; - -import { DotIconComponent } from './dot-icon.component'; - -@NgModule({ - declarations: [DotIconComponent], - exports: [DotIconComponent], - imports: [CommonModule] -}) -export class DotIconModule {} diff --git a/core-web/libs/ui/src/lib/dot-message/dot-message.pipe.ts b/core-web/libs/ui/src/lib/dot-message/dot-message.pipe.ts index b503f3fbc879..956c73dfb65c 100644 --- a/core-web/libs/ui/src/lib/dot-message/dot-message.pipe.ts +++ b/core-web/libs/ui/src/lib/dot-message/dot-message.pipe.ts @@ -4,7 +4,6 @@ import { DotMessageService } from '@dotcms/data-access'; @Pipe({ name: 'dm', - standalone: true, pure: true }) export class DotMessagePipe implements PipeTransform { diff --git a/core-web/libs/ui/src/lib/dot-remove-confirm-popup/dot-remove-confirm-popup.directive.ts b/core-web/libs/ui/src/lib/dot-remove-confirm-popup/dot-remove-confirm-popup.directive.ts index a38e20610618..03dc195255f8 100644 --- a/core-web/libs/ui/src/lib/dot-remove-confirm-popup/dot-remove-confirm-popup.directive.ts +++ b/core-web/libs/ui/src/lib/dot-remove-confirm-popup/dot-remove-confirm-popup.directive.ts @@ -9,8 +9,7 @@ import { ConfirmationService } from 'primeng/api'; * When the 'Escape' key is pressed, it closes it using the confirmation service. */ @Directive({ - selector: 'p-confirmPopup[dotRemoveConfirmPopupWithEscape]', - standalone: true + selector: 'p-confirmPopup[dotRemoveConfirmPopupWithEscape]' }) export class DotRemoveConfirmPopupWithEscapeDirective { private confirmationService: ConfirmationService = inject(ConfirmationService); diff --git a/core-web/libs/ui/src/lib/dot-site-selector/dot-site-selector.directive.ts b/core-web/libs/ui/src/lib/dot-site-selector/dot-site-selector.directive.ts index 113b06a725cc..fa7a70178973 100644 --- a/core-web/libs/ui/src/lib/dot-site-selector/dot-site-selector.directive.ts +++ b/core-web/libs/ui/src/lib/dot-site-selector/dot-site-selector.directive.ts @@ -11,8 +11,7 @@ import { Site } from '@dotcms/dotcms-js'; @Directive({ selector: '[dotSiteSelector]', - providers: [PaginatorService], - standalone: true + providers: [PaginatorService] }) export class DotSiteSelectorDirective implements OnInit, OnDestroy { private readonly primeDropdown = inject(Dropdown, { optional: true, self: true }); diff --git a/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.spec.ts b/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.spec.ts index 69817b8514bd..a7d8d1f1dc1e 100644 --- a/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.spec.ts +++ b/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.spec.ts @@ -2,7 +2,6 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'; import { By } from '@angular/platform-browser'; import { DotSpinnerComponent } from './dot-spinner.component'; -import { DotSpinnerModule } from './dot-spinner.module'; describe('DotSpinnerComponent', () => { let component: DotSpinnerComponent; @@ -10,8 +9,7 @@ describe('DotSpinnerComponent', () => { beforeEach(() => { TestBed.configureTestingModule({ - declarations: [], - imports: [DotSpinnerModule] + imports: [DotSpinnerComponent] }).compileComponents(); fixture = TestBed.createComponent(DotSpinnerComponent); diff --git a/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.ts b/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.ts index af3d490600d7..e8668a83aed9 100644 --- a/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.ts +++ b/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.component.ts @@ -1,10 +1,11 @@ +import { CommonModule } from '@angular/common'; import { Component, Input } from '@angular/core'; @Component({ selector: 'dot-spinner', templateUrl: './dot-spinner.component.html', styleUrls: ['./dot-spinner.component.scss'], - standalone: false + imports: [CommonModule] }) export class DotSpinnerComponent { @Input() borderSize = ''; diff --git a/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.module.ts b/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.module.ts deleted file mode 100644 index 83415be33842..000000000000 --- a/core-web/libs/ui/src/lib/dot-spinner/dot-spinner.module.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { CommonModule } from '@angular/common'; -import { NgModule } from '@angular/core'; - -import { DotSpinnerComponent } from './dot-spinner.component'; - -@NgModule({ - declarations: [DotSpinnerComponent], - imports: [CommonModule], - exports: [DotSpinnerComponent], - providers: [] -}) -export class DotSpinnerModule {} diff --git a/core-web/libs/ui/src/lib/modules/dot-dialog/dot-dialog.component.html b/core-web/libs/ui/src/lib/modules/dot-dialog/dot-dialog.component.html index 2b8e31d563d4..922e97299290 100644 --- a/core-web/libs/ui/src/lib/modules/dot-dialog/dot-dialog.component.html +++ b/core-web/libs/ui/src/lib/modules/dot-dialog/dot-dialog.component.html @@ -16,11 +16,11 @@

(click)="close($event)" icon="pi pi-times" styleClass="p-button-rounded p-button-text" - data-testId="close-button"> + data-testId="close-button" /> }
- +
@if (actions) {