Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Use of JS7 - Job Resources to specify mail parameterization is encouraged.

Health Status Check

The health status check performed by the MonitoringJob makes use of the JS7 REST API

  • to retrieve such information,
  • to write this information to a report file,
  • to evaluate if the information indicates a healthy JS7 environment.

Report File

Find a sample report file for download that indicates an alert: monitor.2022-08-17.09-16-44.9Z.alert.json

Code Block
titleSample Report File
collapsetrue
{
  "controllerStatus" : {
    "active" : {
      "id" : 3,
      "surveyDate" : "2022-08-17T08:57:43.000+00:00",
      "controllerId" : "testsuite",
      "title" : "SECONDARY CONTROLLER",
      "host" : "controller-2-0-secondary",
      "url" : "https://controller-2-0-secondary:4443",
      "clusterUrl" : "https://controller-2-0-secondary:4443",
      "role" : "BACKUP",
      "isCoupled" : false,
      "startedAt" : "2022-08-16T18:09:27.000+00:00",
      "version" : "2.5.0-SNAPSHOT+fd0eb39",
      "javaVersion" : "17.0.4+8-alpine-r0",
      "os" : {
        "name" : "Linux",
        "architecture" : "amd64",
        "distribution" : "3.10.0-957.1.3.el7.x86_64"
      },
      "securityLevel" : "MEDIUM"
    },
    "volatileStatus" : {
      "id" : 2,
      "surveyDate" : "2022-08-17T09:16:45.064+00:00",
      "controllerId" : "testsuite",
      "title" : "PRIMARY CONTROLLER",
      "host" : "controller-2-0-primary",
      "url" : "https://controller-2-0-primary:4443",
      "clusterUrl" : "https://controller-2-0-primary:4443",
      "role" : "PRIMARY",
      "isCoupled" : true,
      "startedAt" : "2022-08-16T18:09:26.004+00:00",
      "version" : "2.5.0-SNAPSHOT+fd0eb39",
      "javaVersion" : "17.0.4+8-alpine-r0",
      "os" : {
        "name" : "Linux",
        "architecture" : "amd64",
        "distribution" : "3.10.0-957.1.3.el7.x86_64"
      },
      "securityLevel" : "MEDIUM",
      "componentState" : {
        "severity" : 0,
        "_text" : "operational"
      },
      "connectionState" : {
        "severity" : 0,
        "_text" : "established"
      },
      "clusterNodeState" : {
        "severity" : 0,
        "_text" : "active"
      }
    },
    "permanentStatus" : {
      "id" : 2,
      "surveyDate" : "2022-08-16T18:12:47.169+00:00",
      "controllerId" : "testsuite",
      "title" : "PRIMARY CONTROLLER",
      "host" : "controller-2-0-primary",
      "url" : "https://controller-2-0-primary:4443",
      "clusterUrl" : "https://controller-2-0-primary:4443",
      "role" : "PRIMARY",
      "startedAt" : "2022-08-16T18:09:26.004+00:00",
      "version" : "2.5.0-SNAPSHOT+fd0eb39",
      "javaVersion" : "17.0.4+8-alpine-r0",
      "os" : {
        "name" : "Linux",
        "architecture" : "amd64",
        "distribution" : "3.10.0-957.1.3.el7.x86_64"
      }
    }
  },
  "jocStatus" : {
    "active" : {
      "id" : 2,
      "memberId" : "joc-2-0-primary:97c88ccc3975703ebd0b7277d394ec8768f88b31775e8df038572d2547c240a0",
      "title" : "PRIMARY JOC COCKPIT",
      "current" : true,
      "host" : "joc-2-0-primary",
      "url" : "https://joc-2-0-primary:4443",
      "startedAt" : "2022-08-16T18:10:27.000+00:00",
      "version" : "2.5.0-SNAPSHOT",
      "connectionState" : {
        "severity" : 0,
        "_text" : "established"
      },
      "componentState" : {
        "severity" : 0,
        "_text" : "operational"
      },
      "clusterNodeState" : {
        "severity" : 0,
        "_text" : "active"
      },
      "controllerConnectionStates" : [ {
        "role" : "PRIMARY",
        "state" : {
          "severity" : 0,
          "_text" : "established"
        }
      }, {
        "role" : "BACKUP",
        "state" : {
          "severity" : 0,
          "_text" : "established"
        }
      } ],
      "os" : {
        "name" : "Linux",
        "architecture" : "amd64",
        "distribution" : "3.10.0-957.1.3.el7.x86_64"
      },
      "securityLevel" : "MEDIUM",
      "lastHeartbeat" : "2022-08-17T09:16:37.000+00:00"
    },
    "passive" : [ {
      "id" : 1,
      "memberId" : "joc-2-0-secondary:97c88ccc3975703ebd0b7277d394ec8768f88b31775e8df038572d2547c240a0",
      "title" : "SECONDARY JOC COCKPIT",
      "current" : false,
      "host" : "joc-2-0-secondary",
      "url" : "https://joc-2-0-secondary.sos:7543",
      "startedAt" : "2022-08-16T18:10:27.000+00:00",
      "version" : "2.5.0-SNAPSHOT",
      "connectionState" : {
        "severity" : 0,
        "_text" : "established"
      },
      "componentState" : {
        "severity" : 0,
        "_text" : "operational"
      },
      "clusterNodeState" : {
        "severity" : 1,
        "_text" : "inactive"
      },
      "controllerConnectionStates" : [ {
        "role" : "PRIMARY",
        "state" : {
          "severity" : 0,
          "_text" : "established"
        }
      }, {
        "role" : "BACKUP",
        "state" : {
          "severity" : 0,
          "_text" : "established"
        }
      } ],
      "os" : {
        "name" : "Linux",
        "architecture" : "amd64",
        "distribution" : "3.10.0-957.1.3.el7.x86_64"
      },
      "securityLevel" : "MEDIUM",
      "lastHeartbeat" : "2022-08-17T09:16:37.000+00:00"
    } ]
  },
  "agentStatus" : [ {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_001",
    "agentName" : "primaryAgent",
    "url" : "https://agent-2-0-primary:4443",
    "version" : "2.5.0-SNAPSHOT",
    "state" : {
      "severity" : 0,
      "_text" : "COUPLED"
    },
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 1,
    "isClusterWatcher" : true,
    "disabled" : false
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_002",
    "agentName" : "secondaryAgent",
    "url" : "https://agent-2-0-secondary:4443",
    "version" : "2.5.0-SNAPSHOT",
    "state" : {
      "severity" : 0,
      "_text" : "COUPLED"
    },
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : false
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_004",
    "agentName" : "wintestAgent",
    "url" : "http://192.11.0.146:4245",
    "version" : "2.4.0",
    "state" : {
      "severity" : 0,
      "_text" : "COUPLED"
    },
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : false
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_005",
    "agentName" : "apmaccsAgent",
    "url" : "http://192.11.3.3:4449",
    "state" : {
      "severity" : 2,
      "_text" : "UNKNOWN"
    },
    "healthState" : {
      "severity" : 2,
      "_text" : "NO_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : true
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_006",
    "agentName" : "apmacwinAgent",
    "url" : "http://192.11.2.2:4245",
    "state" : {
      "severity" : 2,
      "_text" : "UNKNOWN"
    },
    "healthState" : {
      "severity" : 2,
      "_text" : "NO_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : true
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_101",
    "agentName" : "agent17",
    "url" : "http://centostest_primary.sos:7775",
    "version" : "2.4.0-beta.20220714",
    "state" : {
      "severity" : 0,
      "_text" : "COUPLED"
    },
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : false
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_009",
    "agentName" : "oracleAgent",
    "url" : "http://minos.sos:4445",
    "version" : "2.4.0-beta.20220714",
    "state" : {
      "severity" : 0,
      "_text" : "COUPLED"
    },
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : false
  }, {
    "subagents" : [ {
      "isDirector" : "PRIMARY_DIRECTOR",
      "agentId" : "agent_cluster_001",
      "subagentId" : "director_primary_001",
      "url" : "https://diragent-2-0-primary:4443",
      "version" : "2.5.0-SNAPSHOT",
      "state" : {
        "severity" : 0,
        "_text" : "COUPLED"
      },
      "orders" : [ ],
      "runningTasks" : 0,
      "isClusterWatcher" : false,
      "disabled" : false
    }, {
      "isDirector" : "NO_DIRECTOR",
      "agentId" : "agent_cluster_001",
      "subagentId" : "subagent_primary_001",
      "url" : "https://subagent-2-0-primary:4443",
      "version" : "2.5.0-SNAPSHOT",
      "state" : {
        "severity" : 0,
        "_text" : "COUPLED"
      },
      "orders" : [ ],
      "runningTasks" : 0,
      "isClusterWatcher" : false,
      "disabled" : false
    }, {
      "isDirector" : "NO_DIRECTOR",
      "agentId" : "agent_cluster_001",
      "subagentId" : "subagent_secondary_001",
      "url" : "https://subagent-2-0-secondary:4443",
      "version" : "2.5.0-SNAPSHOT",
      "state" : {
        "severity" : 0,
        "_text" : "COUPLED"
      },
      "orders" : [ ],
      "runningTasks" : 0,
      "isClusterWatcher" : false,
      "disabled" : false
    }, {
      "isDirector" : "NO_DIRECTOR",
      "agentId" : "agent_cluster_001",
      "subagentId" : "subagent_third_001",
      "url" : "https://subagent-2-0-third:4443",
      "version" : "2.5.0-SNAPSHOT",
      "state" : {
        "severity" : 0,
        "_text" : "COUPLED"
      },
      "orders" : [ ],
      "runningTasks" : 0,
      "isClusterWatcher" : false,
      "disabled" : false
    } ],
    "controllerId" : "testsuite",
    "agentId" : "agent_cluster_001",
    "agentName" : "AgentCluster001",
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : false
  }, {
    "subagents" : [ ],
    "controllerId" : "testsuite",
    "agentId" : "agent_014",
    "agentName" : "winutf8Agent",
    "url" : "http://192.11.0.146:4445",
    "version" : "2.4.0",
    "state" : {
      "severity" : 0,
      "_text" : "COUPLED"
    },
    "healthState" : {
      "severity" : 0,
      "_text" : "ALL_SUBAGENTS_ARE_COUPLED_AND_ENABLED"
    },
    "orders" : [ ],
    "runningTasks" : 0,
    "isClusterWatcher" : false,
    "disabled" : false
  } ],
  "orderSnapshot" : {
    "pending" : 0,
    "scheduled" : 1262,
    "inProgress" : 0,
    "running" : 1,
    "prompting" : 0,
    "suspended" : 0,
    "waiting" : 770,
    "blocked" : 0,
    "failed" : 0,
    "terminated" : 1
  },
  "orderSummary" : {
    "failed" : 0
  }
}


Health Status Checks

The MonitoringJob performs the following health status checks:

  • Controller
    • In volatileStatus the element connectionStates includes severity with a value 0.
    • In volatileStatus the element componentState includes severity with a value 0.
    • If role is present and does not carry the value STANDALONE in volatileStatus then the element clusterNodeState has to have severity with a value 0.
    • If role is present and does not contain the value STANDALONE in volatileStatus then the element isCoupled has to have the value true.
  • Agents
    • In agentStatus the healthState is present and has severity with a value 0.
    • In agentStatus the state is present and has severity with a value 0.
    • For each enabled subAgent the state has severity with a value 0.
  • JOC Cockpit
    • The connectionState has severity with a value 0.
    • The componentState has severity with a value 0.
    • If clusterNodeState is present it has severity with a  value 0.
    • If controllerConnectionStates is present each connectionState has severity with a value 0.

The number of failed checks is reported by the result return variable, see next chapter.

Documentation

The Job Documentation including the full list of arguments can be found under: https://www.sos-berlin.com/doc/JS7-JITL/MonitoringJob.xml

...