fix(scheduler): #152 problem B — persist and surface cron error_detail

Closes #152 problem B (schedule history API drops error detail).

Two tiny changes:

1. scheduler.fireSchedule now writes lastError into activity_logs.error_detail
   when inserting the cron_run row. Previously the column was left NULL even
   on failure because the INSERT didn't include it.

2. schedules.History SELECT now reads error_detail and includes it in the
   JSON response under error_detail. Frontend + audit cron can now display
   "why did this run fail" instead of just "status=error".

No schema change — activity_logs.error_detail already exists from
migration 009. This just starts using the column.

Problem A of #152 (Research Lead ecosystem-watch 50% error rate on its
own) is a separate ops investigation and stays open.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-15 11:11:16 -07:00
parent ddce151698
commit ce88a396da
2 changed files with 17 additions and 8 deletions

View File

@ -275,8 +275,12 @@ func (h *ScheduleHandler) History(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// #152: include error_detail in history so UI can show why a run failed.
// activity_logs.error_detail is populated by scheduler.fireSchedule when
// the A2A proxy returns non-2xx or the update SQL reports an error.
rows, err := db.DB.QueryContext(ctx, `
SELECT created_at, duration_ms, status,
COALESCE(error_detail, '') as error_detail,
COALESCE(request_body::text, '{}') as request_body
FROM activity_logs
WHERE workspace_id = $1
@ -292,17 +296,18 @@ func (h *ScheduleHandler) History(c *gin.Context) {
defer rows.Close()
type historyEntry struct {
Timestamp time.Time `json:"timestamp"`
DurationMs *int `json:"duration_ms"`
Status *string `json:"status"`
Request json.RawMessage `json:"request"`
Timestamp time.Time `json:"timestamp"`
DurationMs *int `json:"duration_ms"`
Status *string `json:"status"`
ErrorDetail string `json:"error_detail"`
Request json.RawMessage `json:"request"`
}
entries := make([]historyEntry, 0)
for rows.Next() {
var e historyEntry
var reqStr string
if err := rows.Scan(&e.Timestamp, &e.DurationMs, &e.Status, &reqStr); err != nil {
if err := rows.Scan(&e.Timestamp, &e.DurationMs, &e.Status, &e.ErrorDetail, &reqStr); err != nil {
continue
}
e.Request = json.RawMessage(reqStr)

View File

@ -290,10 +290,14 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) {
"cron_expr": sched.CronExpr,
"prompt": truncate(sched.Prompt, 200),
})
// #152: persist lastError into error_detail on the activity_logs row
// so GET /workspaces/:id/schedules/:id/history can surface why a run
// failed (previously dropped — history returned status without any
// error context, making root-cause debugging impossible).
_, _ = db.DB.ExecContext(ctx, `
INSERT INTO activity_logs (workspace_id, activity_type, source_id, method, summary, request_body, status, created_at)
VALUES ($1, 'cron_run', NULL, 'cron', $2, $3::jsonb, $4, now())
`, sched.WorkspaceID, "Cron: "+sched.Name, string(cronMeta), lastStatus)
INSERT INTO activity_logs (workspace_id, activity_type, source_id, method, summary, request_body, status, error_detail, created_at)
VALUES ($1, 'cron_run', NULL, 'cron', $2, $3::jsonb, $4, $5, now())
`, sched.WorkspaceID, "Cron: "+sched.Name, string(cronMeta), lastStatus, lastError)
if s.broadcaster != nil {
s.broadcaster.RecordAndBroadcast(ctx, "CRON_EXECUTED", sched.WorkspaceID, map[string]interface{}{