Skip to main content

Retry Failed Task

Overview

The RetryFailedTaskCommand re-executes a failed task with optionally updated variables. Use this when a task fails due to temporary issues (network timeout, service unavailable, etc.) that might succeed on retry.

Automatic Retry

This command manually retries a failed task. For automatic retry logic, configure retry policies in your task definitions.

API Endpoint

POST /api/core/cmd

Headers

Content-Type: application/json
Authorization: Bearer {access_token}
X-Tenant-ID: {tenant_id}

Request Structure

{
"cmd": "RetryFailedTaskCommand",
"data": {
"instanceGuid": "abc-123-def-456",
"updatedVariables": {
"retryCount": 2,
"timeout": 10000
},
"comment": "Retrying after increasing timeout value"
}
}

Request Fields

FieldTypeRequiredDescription
instanceGuidstringYesThe unique identifier of the process instance with failed task
updatedVariablesobjectNoVariables to update before retrying the task
commentstringNoReason for retry (for audit trail)

When to Use

✅ Good Use Cases

  • Temporary Network Issues: API call timed out or connection failed
  • Service Temporarily Unavailable: External service returned 503 error
  • Rate Limiting: Service returned 429 Too Many Requests
  • Transient Database Lock: Deadlock or lock timeout that may resolve
  • Updated Configuration: Retry with corrected parameters

❌ Bad Use Cases

  • Logic Errors: Code bugs or incorrect business logic
  • Invalid Data: Data validation failures
  • Permanent Failures: 404 Not Found, 401 Unauthorized
  • Resource Not Exists: File or record doesn't exist
  • Configuration Errors: Wrong endpoint or missing credentials

Sample Requests

1. Retry After Network Timeout

{
"cmd": "RetryFailedTaskCommand",
"data": {
"instanceGuid": "loan-app-2024-001",
"updatedVariables": {
"timeout": 30000,
"retryAttempt": 2
},
"comment": "Network timeout on first attempt - increasing timeout to 30s"
}
}

2. Retry with Corrected Data

{
"cmd": "RetryFailedTaskCommand",
"data": {
"instanceGuid": "kyc-process-456",
"updatedVariables": {
"apiEndpoint": "https://api-backup.example.com/verify",
"retryReason": "primary_endpoint_unavailable"
},
"comment": "Primary API endpoint down - retrying with backup endpoint"
}
}

3. Simple Retry Without Changes

{
"cmd": "RetryFailedTaskCommand",
"data": {
"instanceGuid": "payment-789",
"comment": "Service temporarily unavailable - retrying immediately"
}
}

Response Structure

Successfully Retried and Continued

{
"isSuccessful": true,
"message": "Task retried successfully and process continued.",
"statusCode": "00",
"data": {
"instanceGuid": "loan-app-2024-001",
"status": "running",
"currentActivityId": "Task_AssessRisk",
"retriedTask": {
"taskId": "Task_CreditCheck",
"taskName": "Perform Credit Check",
"previousAttempts": 1,
"retriedAt": "2024-12-19T10:35:00Z",
"retryComment": "Network timeout on first attempt - increasing timeout to 30s"
},
"result": {
"message": "Task execution successful",
"taskExecutionTime": 2500,
"waiting": false
},
"state": {
"variables": {
"loanId": 789,
"customerId": 456,
"creditScore": 720,
"timeout": 30000,
"retryAttempt": 2,
"creditCheckSuccess": true
},
"currentNode": "Task_AssessRisk",
"completedTasks": [
"StartEvent",
"Task_ValidateCustomer",
"Task_CreditCheck"
]
}
}
}

Retried But Failed Again

{
"isSuccessful": false,
"message": "Task retry failed.",
"statusCode": "99",
"data": {
"instanceGuid": "loan-app-2024-001",
"status": "error",
"error": {
"taskId": "Task_CreditCheck",
"taskName": "Perform Credit Check",
"errorMessage": "Credit bureau API still unavailable - 503 Service Unavailable",
"attemptNumber": 2,
"lastAttemptAt": "2024-12-19T10:35:00Z"
},
"nextActions": [
{
"action": "retry",
"label": "Retry Again",
"command": "RetryFailedTaskCommand"
},
{
"action": "skip",
"label": "Skip This Task",
"command": "SkipFailedTaskCommand"
},
{
"action": "cancel",
"label": "Cancel Process",
"command": "CancelProcessInstanceCommand"
}
]
}
}

Retried and Waiting at Next UserTask

{
"isSuccessful": true,
"message": "Task retried successfully. Process waiting at next UserTask.",
"statusCode": "00",
"data": {
"instanceGuid": "loan-app-2024-001",
"status": "waiting",
"currentActivityId": "Task_ApprovalDecision",
"retriedTask": {
"taskId": "Task_CreditCheck",
"taskName": "Perform Credit Check",
"retriedAt": "2024-12-19T10:35:00Z"
},
"currentTask": {
"taskId": "Task_ApprovalDecision",
"taskName": "Loan Approval Decision",
"taskType": "UserTask"
},
"state": {
"variables": {
"creditScore": 720,
"retrySuccessful": true
}
},
"nextActions": [
{
"action": "approve",
"label": "Approve Loan"
},
{
"action": "reject",
"label": "Reject Loan"
}
]
}
}

Error Responses

Process Not in Error State

{
"isSuccessful": false,
"message": "Process instance is not in error state. Current state: running",
"statusCode": "99",
"data": {
"currentStatus": "running"
}
}

Process Not Found

{
"isSuccessful": false,
"message": "Process instance 'invalid-guid' not found.",
"statusCode": "99",
"data": null
}

Retry Strategy Flow

Use Cases

1. Exponential Backoff Retry

class RetryManager {
async retryWithBackoff(instanceGuid, maxRetries = 5) {
const delays = [1000, 2000, 5000, 10000, 30000]; // Exponential backoff

for (let attempt = 0; attempt < maxRetries; attempt++) {
// Wait before retry (except first attempt)
if (attempt > 0) {
const delay = delays[attempt - 1];
console.log(`Waiting ${delay}ms before retry attempt ${attempt + 1}`);
await this.sleep(delay);
}

try {
const response = await this.retryFailedTask({
instanceGuid,
updatedVariables: {
retryAttempt: attempt + 1,
retryTimestamp: new Date().toISOString()
},
comment: `Retry attempt ${attempt + 1} of ${maxRetries}`
});

if (response.isSuccessful) {
console.log(`Retry successful on attempt ${attempt + 1}`);
return response;
}

console.warn(`Retry attempt ${attempt + 1} failed:`, response.message);

} catch (error) {
console.error(`Retry attempt ${attempt + 1} error:`, error.message);
}
}

throw new Error(`All ${maxRetries} retry attempts failed`);
}

sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}

async retryFailedTask(data) {
return await fetch('/api/bpm/cmd', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
cmd: 'RetryFailedTaskCommand',
data
})
}).then(r => r.json());
}
}

// Usage
const retryManager = new RetryManager();
await retryManager.retryWithBackoff('loan-app-2024-001');

2. Retry with Updated Configuration

async function retryWithFallback(instanceGuid, failedTask) {
const state = await getProcessState(instanceGuid);
const variables = state.data.state.variables;

// Try primary endpoint first
if (variables.retryAttempt === undefined || variables.retryAttempt < 2) {
return await retryFailedTask({
instanceGuid,
updatedVariables: {
retryAttempt: (variables.retryAttempt || 0) + 1,
timeout: 15000 // Increase timeout
},
comment: "Retrying with increased timeout"
});
}

// Switch to backup endpoint after 2 failures
if (variables.retryAttempt < 4) {
return await retryFailedTask({
instanceGuid,
updatedVariables: {
apiEndpoint: "https://backup-api.example.com",
retryAttempt: variables.retryAttempt + 1,
timeout: 20000
},
comment: "Switching to backup endpoint"
});
}

// After 4 attempts, give up and skip
console.warn("Max retries exceeded, skipping task");
return await skipFailedTask({
instanceGuid,
comment: "All retry attempts exhausted - manual intervention required"
});
}

3. Conditional Retry Based on Error Type

async function intelligentRetry(instanceGuid) {
const state = await getProcessState(instanceGuid);

if (state.data.status !== "error") {
throw new Error("Process is not in error state");
}

const error = state.data.error;
const errorType = classifyError(error.errorMessage);

switch (errorType) {
case "NETWORK_TIMEOUT":
case "SERVICE_UNAVAILABLE":
return await retryWithBackoff(instanceGuid);

case "RATE_LIMITED":
await waitForRateLimitReset();
return await retryFailedTask({ instanceGuid });

case "AUTHENTICATION_ERROR":
// Refresh credentials
const newToken = await refreshAuthToken();
return await retryFailedTask({
instanceGuid,
updatedVariables: {
authToken: newToken
}
});

case "INVALID_DATA":
case "LOGIC_ERROR":
// Don't retry - these need manual intervention
throw new Error(`Cannot retry ${errorType}: ${error.errorMessage}`);

default:
// Unknown error - try once with no changes
return await retryFailedTask({
instanceGuid,
comment: "Unknown error type - single retry attempt"
});
}
}

function classifyError(errorMessage) {
if (errorMessage.includes("timeout") || errorMessage.includes("ETIMEDOUT")) {
return "NETWORK_TIMEOUT";
}
if (errorMessage.includes("503") || errorMessage.includes("unavailable")) {
return "SERVICE_UNAVAILABLE";
}
if (errorMessage.includes("429") || errorMessage.includes("rate limit")) {
return "RATE_LIMITED";
}
if (errorMessage.includes("401") || errorMessage.includes("unauthorized")) {
return "AUTHENTICATION_ERROR";
}
if (errorMessage.includes("validation") || errorMessage.includes("invalid")) {
return "INVALID_DATA";
}
return "UNKNOWN";
}

4. Retry with Circuit Breaker Pattern

class CircuitBreaker {
constructor(maxFailures = 3, resetTimeout = 60000) {
this.maxFailures = maxFailures;
this.resetTimeout = resetTimeout;
this.failures = 0;
this.state = "CLOSED"; // CLOSED, OPEN, HALF_OPEN
this.nextAttempt = Date.now();
}

async retry(instanceGuid) {
if (this.state === "OPEN") {
if (Date.now() < this.nextAttempt) {
throw new Error("Circuit breaker is OPEN - retry not allowed yet");
}
// Try to close circuit
this.state = "HALF_OPEN";
}

try {
const response = await retryFailedTask({ instanceGuid });

if (response.isSuccessful) {
// Success - reset circuit breaker
this.failures = 0;
this.state = "CLOSED";
return response;
}

// Task retried but failed again
this.handleFailure();
throw new Error("Retry failed");

} catch (error) {
this.handleFailure();
throw error;
}
}

handleFailure() {
this.failures++;

if (this.failures >= this.maxFailures) {
this.state = "OPEN";
this.nextAttempt = Date.now() + this.resetTimeout;
console.warn(`Circuit breaker OPEN until ${new Date(this.nextAttempt)}`);
}
}

reset() {
this.failures = 0;
this.state = "CLOSED";
}
}

// Usage
const circuitBreaker = new CircuitBreaker();

try {
await circuitBreaker.retry('loan-app-2024-001');
} catch (error) {
if (circuitBreaker.state === "OPEN") {
console.error("Service is down - circuit breaker open");
// Maybe skip the task or alert operations
}
}

Best Practices

1. Implement Maximum Retry Limits

const MAX_RETRIES = 5;

async function retryWithLimit(instanceGuid) {
const state = await getProcessState(instanceGuid);
const retryCount = state.data.state.variables.retryCount || 0;

if (retryCount >= MAX_RETRIES) {
throw new Error(`Maximum retry limit (${MAX_RETRIES}) reached`);
}

return await retryFailedTask({
instanceGuid,
updatedVariables: {
retryCount: retryCount + 1,
lastRetryAt: new Date().toISOString()
},
comment: `Retry attempt ${retryCount + 1} of ${MAX_RETRIES}`
});
}

2. Log All Retry Attempts

async function retryWithLogging(instanceGuid, updatedVariables, comment) {
const startTime = Date.now();

console.log(`[RETRY] Starting retry for instance ${instanceGuid}`);
console.log(`[RETRY] Reason: ${comment}`);
console.log(`[RETRY] Updated variables:`, updatedVariables);

try {
const response = await retryFailedTask({
instanceGuid,
updatedVariables,
comment
});

const duration = Date.now() - startTime;

if (response.isSuccessful) {
console.log(`[RETRY] Success after ${duration}ms`);
await auditLog.record({
action: "RETRY_SUCCESS",
instanceGuid,
duration,
comment
});
} else {
console.error(`[RETRY] Failed after ${duration}ms:`, response.message);
await auditLog.record({
action: "RETRY_FAILED",
instanceGuid,
duration,
error: response.message
});
}

return response;

} catch (error) {
const duration = Date.now() - startTime;
console.error(`[RETRY] Error after ${duration}ms:`, error);
await auditLog.record({
action: "RETRY_ERROR",
instanceGuid,
duration,
error: error.message
});
throw error;
}
}

3. Use Idempotent Operations

// Ensure retry is safe even if task partially succeeded
async function retryIdempotently(instanceGuid) {
const state = await getProcessState(instanceGuid);
const variables = state.data.state.variables;

// Generate unique operation ID for idempotency
const operationId = variables.operationId || generateUUID();

return await retryFailedTask({
instanceGuid,
updatedVariables: {
operationId, // Same ID used on retry
retryTimestamp: new Date().toISOString()
},
comment: "Retry with idempotency key"
});
}

4. Monitor Retry Patterns

class RetryMonitor {
constructor() {
this.retryStats = {};
}

async retryWithMonitoring(instanceGuid, taskId) {
const key = `${instanceGuid}:${taskId}`;

if (!this.retryStats[key]) {
this.retryStats[key] = {
attempts: 0,
firstAttempt: new Date(),
lastAttempt: null,
successes: 0,
failures: 0
};
}

const stats = this.retryStats[key];
stats.attempts++;
stats.lastAttempt = new Date();

const response = await retryFailedTask({ instanceGuid });

if (response.isSuccessful) {
stats.successes++;
} else {
stats.failures++;
}

// Alert if too many retries
if (stats.failures > 3) {
await this.alertHighFailureRate(key, stats);
}

return response;
}

async alertHighFailureRate(key, stats) {
console.warn(`High failure rate for ${key}:`, stats);
// Send alert to operations team
}
}

Notes

  • Only works when process is in "error" state
  • Updated variables are merged into process context before retry
  • Task is re-executed completely from the start
  • Previous task execution data is not preserved
  • Comment is stored in audit logs
  • Consider exponential backoff for retries
  • Implement circuit breaker pattern for repeated failures
  • Some errors should not be retried (logic errors, invalid data)
  • Always set maximum retry limits
  • Use idempotent operations when possible
  • Monitor retry patterns to identify systemic issues