diff --git a/docs/design/commit-loading-stall/README.md b/docs/design/commit-loading-stall/README.md new file mode 100644 index 0000000000..645fb464a7 --- /dev/null +++ b/docs/design/commit-loading-stall/README.md @@ -0,0 +1,9 @@ +# Commit Loading Stall Investigation + +Tracks research into a frontend issue where commit actions stay in a loading state even though the API responds quickly. + +## Files +- context.md: Background, problem statement, goals, and non-goals. +- plan.md: High-level execution plan and milestones. +- status.md: Current progress, blockers, and decisions. +- research.md: Codebase findings, hypotheses, and references. diff --git a/docs/design/commit-loading-stall/context.md b/docs/design/commit-loading-stall/context.md new file mode 100644 index 0000000000..f1df10f4f4 --- /dev/null +++ b/docs/design/commit-loading-stall/context.md @@ -0,0 +1,19 @@ +# Context + +## Background +- The playground commit flow uses CommitVariantChangesModal to save variant changes and optionally deploy. +- The modal waits for local state to reflect the new revision before closing. + +## Problem Statement +- Users report that committing sometimes takes a very long time or appears stuck despite fast API responses. +- The UI shows a loading spinner until client state updates. + +## Goals +- Identify the UI/state conditions that keep the commit modal loading. +- Ensure commits close promptly while still allowing state to settle when needed. +- Preserve deploy-after-commit and revision selection behavior. + +## Non-goals +- Backend API performance or schema changes. +- Redesigning the commit UX beyond fixing the loading stall. +- Fixing unrelated testset or evaluator commit flows unless evidence surfaces. diff --git a/docs/design/commit-loading-stall/plan.md b/docs/design/commit-loading-stall/plan.md new file mode 100644 index 0000000000..f05c0f860a --- /dev/null +++ b/docs/design/commit-loading-stall/plan.md @@ -0,0 +1,57 @@ +# Plan + +## Phase 0 - Define success criteria +- The UI should stop showing commit loading when the server commit succeeds. +- The UI should never wait without a timeout. If state does not settle, the modal should still close and the UI should recover in the background. +- After commit, the initiating UI should point at the new revision. + - Playground flow: update `selectedVariantsAtom` when committing a selected revision. + - Variant drawer flow: update the `revisionId` query param (and drawer state) to the new revision. + +## Phase 1 - Map all commit flows +- Commit from the main playground (selected revision). +- Commit from the variant drawer (revision may not be in `selectedVariantsAtom`). +- Commit as a new variant (branch). +- Commit with "Deploy after commit" enabled. + +For each flow, record: +- Which ID is the input (revision id vs parent variant id). +- Which ID is the output (new revision id, and sometimes new variant id). +- Which UI state must update (playground selection, drawer URL, or both). + +## Phase 2 - Explain why it can feel slow +- Confirm that `selectedVariantsAtom` is localStorage backed selection state. It does not refresh from the API. +- Measure time spent in each step of the commit flow: + - Commit request itself. + - Query invalidation and refetch (`invalidatePlaygroundQueriesAtom`). + - Polling for the newest revision (`waitForNewRevisionAfterMutationAtom`). + +## Phase 3 - Fix design (first principles) +### Choose the right completion signal +- Primary signal: the commit mutation resolves successfully. +- Optional secondary signal: the new revision becomes visible in the revisions list. Only wait for this within a short time budget. +- Do not gate modal close on `selectedVariantsAtom`. Selection is view state, not commit state. + +### Make state settle work asynchronous +- Close the modal as soon as the commit is persisted (and deployment finishes if selected). +- Continue query invalidation and UI updates in the background. +- If the UI cannot determine the new revision id quickly, close anyway and show a success message that versions are refreshing. + +### Keep the initiating context in sync +- If commit starts from the variant drawer, update the drawer URL to the new revision id on success. +- If commit starts from the playground selection, swap the selected revision to the new revision id. + +### Reduce unnecessary waiting +- Avoid awaiting a full refetch of multiple queries in the commit path. +- If we need a new revision id, prefer a targeted fetch for that variant's revisions and pick the newest. +- If the backend can return the new revision id in the commit response, use it and skip polling. + +## Phase 4 - Implementation and validation +- Update `CommitVariantChangesModal` so it always clears loading based on mutation completion (with a timeout fallback). +- Update the variant drawer commit entry point so it navigates to the new revision id after commit. +- Reduce or remove blocking waits on query refetch. +- Validate these scenarios: + - Commit from the playground (selected revision). + - Commit from the variant drawer (non selected revision). + - Commit in comparison view (multiple selected revisions). + - Commit as a new variant. + - Commit with deploy after commit enabled. diff --git a/docs/design/commit-loading-stall/research.md b/docs/design/commit-loading-stall/research.md new file mode 100644 index 0000000000..4e5a33c07f --- /dev/null +++ b/docs/design/commit-loading-stall/research.md @@ -0,0 +1,61 @@ +# Research + +## What the UI currently waits on +The commit modal keeps showing a loading state after a successful commit because it waits for a client side selection update. + +In `web/oss/src/components/Playground/Components/Modals/CommitVariantChangesModal/index.tsx`, the modal: +- Sets `isMutating=true` when you click Commit. +- Runs the commit mutation. +- Sets `waitForRevisionId` to the new revision id. +- Closes only when `selectedVariantsAtom` contains `waitForRevisionId`. + +If that selection update never happens, `isMutating` stays true and the modal keeps loading. + +## Why `selectedVariantsAtom` does not "refresh" +`selectedVariantsAtom` is not server data. It is local UI state. + +It is stored in localStorage under `agenta_selected_revisions_v2` and is scoped per app. +This means it only changes when the UI explicitly sets it. + +Refs: `web/oss/src/components/Playground/state/atoms/core.ts`. + +## Why the modal can hang even when the API is fast +The wait condition uses selection state, not commit state. + +`saveVariantMutationAtom` updates `selectedVariantsAtom` by mapping existing selected ids. +This only changes selection when the committed revision id was already selected. + +If you commit a revision that is not currently selected, the mutation can succeed but selection stays unchanged. +The modal then waits forever for an update that will never occur. + +Refs: `web/oss/src/components/Playground/state/atoms/variantCrud.ts`. + +There is also a second source of slowness. +Even in the normal path, the mutation may spend time waiting for the revisions list to refresh. +It invalidates and refetches several TanStack Query keys, then polls for a new revision id with a 10 to 15 second timeout. + +Refs: `web/oss/src/components/Playground/state/atoms/queries.ts`. + +## Entry points where selection can differ +- Commit modal is used in the variant drawer header, which can target a revision that is not in selectedVariantsAtom. +- When committing from that drawer, the modal waits on playground selection even though the drawer selection is independent. +Refs: `web/oss/src/components/VariantsComponents/Drawers/VariantDrawer/assets/VariantDrawerTitle/index.tsx`, `web/oss/src/components/Playground/Components/Modals/CommitVariantChangesModal/assets/CommitVariantChangesButton/index.tsx`. + +## Hypothesis +- The loading "eternity" occurs when committing a revision not currently in selectedVariantsAtom, leaving the commit modal waiting forever for a selection update that never happens. +- A secondary delay can come from the waitForNewRevisionAfterMutationAtom timeout (10-15s), even when API calls return quickly. + +## What could break if we close the modal earlier +Closing the modal on mutation success is safer than waiting on selection, but it can change timing. + +These are the main risks: +- The UI can keep showing the old revision for a short time, because the revisions list refetch can lag. +- The drawer can stay pointed at the old revision if we do not update the `revisionId` URL param. +- The playground can stay pointed at the old revision if we do not swap the selected revision id. +- The user can click Commit again if the dirty state does not clear quickly enough. +- Deploy after commit needs a stable revision id. If we close before we have it, we need to ensure the deploy call uses the correct id. + +Mitigations: +- Update the initiating context pointer (drawer URL or playground selection) on success. +- Add a bounded settle wait (short timeout) when we want to avoid UI flicker. +- Show a toast when we close before the revisions list catches up. diff --git a/docs/design/commit-loading-stall/status.md b/docs/design/commit-loading-stall/status.md new file mode 100644 index 0000000000..786f689629 --- /dev/null +++ b/docs/design/commit-loading-stall/status.md @@ -0,0 +1,16 @@ +# Status + +Last updated: 2026-01-21 + +## Current state +- Implemented a bounded settle wait. The modal now closes on commit success unless the committed revision was selected and needs a short selection swap. +- Added a 1.5 second timeout so the modal never waits indefinitely. +- The variant drawer now updates the `revisionId` query param on commit success so it points at the new revision. + +## Open questions +- Which settle time budget feels right (for example 1 to 2 seconds) before we close and let refresh happen in the background. +- Whether we want to keep the modal open through the deploy step, or close and show a separate deployment status message. + +## Next actions +- Validate commit flows (playground, variant drawer, new variant, deploy after commit). +- Decide if we want a success toast when we close before the revisions list finishes refreshing. diff --git a/web/oss/src/components/Playground/Components/Modals/CommitVariantChangesModal/index.tsx b/web/oss/src/components/Playground/Components/Modals/CommitVariantChangesModal/index.tsx index db7acd0fb1..1cd3e5b676 100644 --- a/web/oss/src/components/Playground/Components/Modals/CommitVariantChangesModal/index.tsx +++ b/web/oss/src/components/Playground/Components/Modals/CommitVariantChangesModal/index.tsx @@ -1,4 +1,4 @@ -import {type ReactElement, useCallback, useEffect, useMemo, useState} from "react" +import {type ReactElement, useCallback, useEffect, useMemo, useRef, useState} from "react" import {FloppyDiskBack} from "@phosphor-icons/react" import {useAtomValue, useSetAtom} from "jotai" @@ -55,11 +55,18 @@ const CommitVariantChangesModal: React.FC = ({ const [selectedEnvironment, setSelectedEnvironment] = useState(null) const [modalSize, setModalSize] = useState({width: 960, height: 640}) const [viewport, setViewport] = useState({width: 0, height: 0}) + const settleTimeoutRef = useRef | null>(null) + + const SETTLE_TIMEOUT_MS = 1500 const onClose = useCallback(() => { onCancel?.({} as any) setIsMutating(false) setWaitForRevisionId(undefined) + if (settleTimeoutRef.current) { + clearTimeout(settleTimeoutRef.current) + settleTimeoutRef.current = null + } setSelectedCommitType({ type: "version", }) @@ -116,11 +123,38 @@ const CommitVariantChangesModal: React.FC = ({ // Wait for the state to reflect the new revision if (selectedRevisionIds?.includes(waitForRevisionId)) { - setIsMutating(false) + if (settleTimeoutRef.current) { + clearTimeout(settleTimeoutRef.current) + settleTimeoutRef.current = null + } onClose() } }, [selectedRevisionIds, waitForRevisionId, onClose]) + useEffect(() => { + return () => { + if (settleTimeoutRef.current) { + clearTimeout(settleTimeoutRef.current) + settleTimeoutRef.current = null + } + } + }, []) + + const startSettleWait = useCallback( + (nextRevisionId: string) => { + if (!nextRevisionId) return + setWaitForRevisionId(nextRevisionId) + if (settleTimeoutRef.current) { + clearTimeout(settleTimeoutRef.current) + } + settleTimeoutRef.current = setTimeout(() => { + settleTimeoutRef.current = null + onClose() + }, SETTLE_TIMEOUT_MS) + }, + [onClose], + ) + const handleDeployAfterCommit = useCallback( async (resultVariant?: any) => { if (!shouldDeploy || !selectedEnvironment) { @@ -179,9 +213,13 @@ const CommitVariantChangesModal: React.FC = ({ await handleDeployAfterCommit(result.variant) // Wait for the state to settle with the new revision - if (result.variant?.id) { - setWaitForRevisionId(result.variant.id) - return // Don't close yet, let useEffect handle it + const nextRevisionId = result.variant?.id + const isAlreadySelected = + nextRevisionId && selectedRevisionIds?.includes(nextRevisionId) + const wasSelected = selectedRevisionIds?.includes(variantId) + if (nextRevisionId && wasSelected && !isAlreadySelected) { + startSettleWait(nextRevisionId) + return // Let selection settle or timeout close } } } else if (selectedCommitType?.type === "variant" && selectedCommitType?.name) { @@ -210,20 +248,21 @@ const CommitVariantChangesModal: React.FC = ({ await handleDeployAfterCommit(result.variant) // Wait for the state to settle with the new revision (same as version commits) - if (newRevisionId) { - setWaitForRevisionId(newRevisionId) - return // Don't close yet, let useEffect handle it + const isAlreadySelected = + newRevisionId && selectedRevisionIds?.includes(newRevisionId) + const wasSelected = selectedRevisionIds?.includes(variantId) + if (newRevisionId && wasSelected && !isAlreadySelected) { + startSettleWait(newRevisionId) + return // Let selection settle or timeout close } } } // If we get here without setting a wait state, close immediately - setIsMutating(false) onClose() } catch (error) { console.error("Failed to commit variant changes:", error) message.error("We couldn't save your changes. Please try again.") - setIsMutating(false) onClose() } }, [ @@ -237,6 +276,8 @@ const CommitVariantChangesModal: React.FC = ({ commitType, handleDeployAfterCommit, onClose, + selectedRevisionIds, + startSettleWait, ]) const isOkDisabled = diff --git a/web/oss/src/components/VariantsComponents/Drawers/VariantDrawer/assets/VariantDrawerTitle/index.tsx b/web/oss/src/components/VariantsComponents/Drawers/VariantDrawer/assets/VariantDrawerTitle/index.tsx index 869a3c4bec..2fa4405889 100644 --- a/web/oss/src/components/VariantsComponents/Drawers/VariantDrawer/assets/VariantDrawerTitle/index.tsx +++ b/web/oss/src/components/VariantsComponents/Drawers/VariantDrawer/assets/VariantDrawerTitle/index.tsx @@ -143,6 +143,7 @@ const TitleActions = memo( variants, isLoading, }: Pick) => { + const [, updateQuery] = useQuery("replace") const appStatus = useAtomValue(currentVariantAppStatusAtom) const selectedVariant = useAtomValue(variantByRevisionIdAtomFamily(variantId)) as any const isDirty = useAtomValue(variantIsDirtyAtomFamily(variantId)) @@ -182,6 +183,10 @@ const TitleActions = memo( size="small" disabled={!isDirty || isLoading} commitType={viewAs} + onSuccess={({revisionId}) => { + if (!revisionId) return + updateQuery({revisionId, drawerType: "variant"}) + }} /> )