Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.


# We need to import request to access the details of the POST request from flask import Flask, request from flask_restful import abort import commands import json import pprint import requests import os import user_functions requests.packages.urllib3.disable_warnings() # Initialize the Flask application app = Flask(__name__) @app.route('/', methods=['POST']) def app_message_post(): print "################# Start #######################" if request.headers['Content-Type'] != 'application/json': abort(400, message="Expected Content-Type = application/json") try: data = request.json print data message = data['message'] print "message " + message playbook_name = data['keys']['_playbook_name'] print "playbook_name " + playbook_name #test_name = data['keys']['test-name'] #print "interface-name " + test_name device_id = data['device-id'] #print device_id + " " + message #if spec['eventRuleId'] == g_rule_id: # state = status['state'] # device_id = status['entityId'] # if state == "active" and device_id == g_device_id: # print 'DATA_ACTIVE :: ', pprint.pprint(data) # user_functions.move_traffic() # print 'traffic detoured and Slack was notified' # elif state == "inactive": # #print 'DATA_INACTIVE :: ', pprint.pprint(data) # print 'LSP path can be changed back' #return json.dumps({'result': 'OK'}) if playbook_name == "cpu_openconfig": print "received cpu high alert" if "exceeds high threshold" in message: print 'CPU HIGH UTIL DETECTED for ' + device_id print 'PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for ' + device_id #create maintenance for simulation purpose rest_index_number = user_functions.get_node_info(device_id) rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_simulation', 'node') maintenance_event = user_functions.create_maintenance(rest_payload) maintenance_index = maintenance_event.json()['maintenanceIndex'] check_simulation = user_functions.check_if_simulation_pass() print "simulation result " + check_simulation user_functions.delete_maintenance(maintenance_index) print "delete temp maintenace" if check_simulation == 'true': print 'CPU HIGH UTIL DETECTED PUT NODE UNDER MAINTENANCE::' # pprint.pprint(data) #print "rest_node_name, rest_index_number" + rest_node_name + rest_index_number rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_maint', 'node') print rest_payload user_functions.create_maintenance(rest_payload) else: print 'CANNOT PUT ' + device_id + ' UNDER MAINTENANCE. EXHUASTIVE FAILURE SIMULATION NOT PASSED' elif "is normal" in message: #print 'DATA_INACTIVE :: ', pprint.pprint(data) print 'CPU util back to normal. ' print '###############################' if playbook_name == "delay": print "received delay alert" source_address = data['keys']['source-address'] #print "interface-ip " + source_address #target_address = data['keys']['target_address'] #print "message" + message if "exceeds delay threshold" in message: print "HIGH DELAY DETECTED for " + device_id + " " + source_address print "PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for " + device_id + " " + source_address #create maintenance for simulation purpose rest_index_number = user_functions.get_link_info_from_ip(source_address) rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_simulation', 'link') maintenance_event = user_functions.create_maintenance(rest_payload) maintenance_index = maintenance_event.json()['maintenanceIndex'] check_simulation = user_functions.check_if_simulation_pass() print "SIMULATION RESULT " + check_simulation user_functions.delete_maintenance(maintenance_index) #print "delete temp maintenace" if check_simulation == "true": print "HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE::" # pprint.pprint(data) #print "rest_node_name, rest_index_number" + rest_node_name + rest_index_number rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_maint', 'link') print rest_payload user_functions.create_maintenance(rest_payload) else: print "CANNOT PUT " + device_id + " " + source_address + " UNDER MAINTENANCE. EXHUASTIVE FAILURE SIMULATION NOT PASSED" elif "is normal" in message: #print 'DATA_INACTIVE :: ', pprint.pprint(data) print "DELAY back to normal. " print "###############################" """ if event_rule_id == AppFormixInterfaceL3IncompleteEventID: print "Received interface l3 incomplete alert" if state == "active": rest_payload = user_functions.generate_link_maitenance_json() print rest_payload user_functions.create_maintenance(rest_payload) print 'Put problematic link into maintenance mode' elif state == "inactive": # print 'DATA_INACTIVE :: ', pprint.pprint(data) print 'link back to normal. you can complete the maintenance event' """ return json.dumps({'result': 'OK'}) except Exception as e: abort(400, message="Exception processing request: {0}".format(e)) print '...' if __name__ == '__main__': app.run( host="0.0.0.0", port=int("10000") )


jcluser@ubuntu:~/self_healing$ python healthbot_listener.py /usr/local/lib/python2.7/dist-packages/urllib3/connectionpool.py:1004: InsecureRequestWarning: Unverified HTTPS request is being ma de. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warn ings InsecureRequestWarning, * Serving Flask app "healthbot_listener" (lazy loading) * Environment: production WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. * Debug mode: off * Running on http://0.0.0.0:10000/ (Press CTRL+C to quit) ################# Start ####################### {u'group': u'group_all', u'severity': u'major', u'keys': {u'_instance_id': u'["delay"]', u'source-address': u'7.105.106.1', u'_play book_name': u'delay'}, u'device-id': u'vMX-5', u'rule': u'probe_delay', u'topic': u'probe-delay', u'trigger': u'probe_exceed', u'me ssage': u'7.105.106.1 ge-0/0/3.0 delay is 303099 exceeds delay threshold 200000 us. '} message 7.105.106.1 ge-0/0/3.0 delay is 303099 exceeds delay threshold 200000 us. playbook_name delay ############################### received delay alert HIGH DELAY DETECTED for vMX-5 7.105.106.1 PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for vMX-5 7.105.106.1 { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "created_for_simulation", "startTime": "20200716T230300Z", "endTime": "20200718T150300Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } SIMULATION RESULT true HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE:: { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141103", "startTime": "20200714T110400Z", "endTime": "20200718T150300Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141103", "startTime": "20200714T110400Z", "endTime": "20200718T150300Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } ############################### 172.22.0.3 - - [14/Jul/2020 04:03:29] "POST / HTTP/1.1" 200 - ################# Start ####################### {u'group': u'group_all', u'severity': u'major', u'keys': {u'_instance_id': u'["delay"]', u'source-address' : u'7.105.106.2', u'_playbook_name': u'delay'}, u'device-id': u'vMX-6', u'rule': u'probe_delay', u'topic': u'probe-delay', u'trigger': u'probe_exceed', u'message': u'7.105.106.2 ge-0/0/3.0 delay is 274028 exceed s delay threshold 200000 us. '} message 7.105.106.2 ge-0/0/3.0 delay is 274028 exceeds delay threshold 200000 us. playbook_name delay ############################### received delay alert HIGH DELAY DETECTED for vMX-6


blueprint:

NorthStar (v5) and HealthBot (v2.0.2)

video
Log into the GUI
NortStar NorthStar or NS login

https://<ip@><port>admin <ip@>:<port>         (   admin // Juniper!11    )

HealthBot

SSH or https://jcluser                       ( jcluser // Juniper!11    )

Add Allowed network prefixCommands menu >> Add Allowed network prefix >>> <enter ip address of client>
CLI on Healthbot

Start healthbot_listener.py

( Webhook + trigger NS to put the link in maintenance)

Log into ssh jcluser@healthbot     ( jcluser // Juniper!1  )

It's a webhook, listen for HB messages 


Usage:

jcluser@ubuntu:~$ ls
healthbot-2.0.2-1.deb self_healing


jcluser@ubuntu:~$ cd self_healing/
jcluser@ubuntu:~/self_healing$ ls
healthbot_listener.py maintenance.j2 README.md RPMprobe.yml user_functions.py user_functions.pyc

jcluser@ubuntu:~/self_healing$ python healthbot_listener.py/usr/local/lib/python2.7/dist-packages/urllib3/connectionpool.py:1004: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
InsecureRequestWarning,
* Serving Flask app "healthbot_listener" (lazy loading)
* Environment: production
WARNING: This is a development server. Do not use it in a production deployment.
Use a production WSGI server instead.
* Debug mode: off
* Running on http://0.0.0.0:10000/ (Press CTRL+C to quit)

Code Block
titleHealthBot Listener script
collapsetrue
Create two Tunnels: PCEP and NETCONF

Tunnel 1 = jlk_4_to_1_PCEP  ( Control type: PCEInitiated )     Path Computation Element Protocol

Tunnel 2 = jlk_4_to_1_Netconf  ( Control type: Device Controlled )

and

Application Menu >> Path Optimization >>  path optimization >> Enable + 1 minute


Image Added


Image Added


Set the Path Optimization every minutes


Image Added


Image Added

Display the Delay Tab


Image Added


Image Added


Connect on the CentOS ( to create delay )
add delay 

Log into the CentOS  ( jcluser / Juniper!1 )

su -   ( Juniper!1 )

./add_delay.sh


[root@CentOS ~]# more add_delay.sh
tc qdisc add dev eth1 root netem delay 300ms
echo "300 ms delay added between vMX-5 and vMX-6"
[root@CentOS ~]#



SSH to HealthBot
healthbot_listener.py output

HealthBot : monitopr the RPM 

HB >>> threshold cross >>> change the status on the HB   + send an "put in maintenance the link" to NortStar

Code Block
titleoutput
collapsetrue

Monitor Output 


received delay alert
HIGH DELAY DETECTED for vMX-6 7.105.106.2


PERFORMING

EXHUASTIVE

LINK

FAILURE

SIMULATION

for

vMX-6

7.105.106.2

{ "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "created_for_simulation", "startTime": "20200716T230400Z", "endTime": "20200718T150400Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } SIMULATION RESULT true HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE:: { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141104", "startTime": "20200714T110500Z", "endTime": "20200718T150400Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141104", "startTime": "20200714T110500Z", "endTime": "20200718T150400Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } ############################### 172.22.0.3 - - [14/Jul/2020 04:04:29] "POST / HTTP/1.1" 200 -
WebHook ( send to webserver ) on HB

1- WebHook config under: Settings Menu >> Notification Settings

2- Add the WebHook Notification to the Device group under:  Dashboard >> Select a Device group 

Image Removed

Image Removed 

2- Add the WebHook Notification to the Device group under:  Dashboard >> Select a Device group

Image Removed


SIMULATION RESULT true
HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE::


On NorthStar


1- Link goes on maintenance mode

2- Tunnel re-router to avoid the link

Image Added

Go to the maintenance tab

Image Added


Image Added