- Created by Jean-luc KRIKER, last modified on Sept 17, 2020
You are viewing an old version of this page. View the current version.
Compare with Current View Page History
« Previous Version 2 Next »
blueprint: NorthStar (v5) and HealthBot (v2.0.2) | |
video | |
NortStar or NS login | https://<ip@><port> admin // Juniper!1 |
HealthBot | SSH or https:// jcluser // Juniper!1 |
healthbot_listener.py | It's a webhook, listen for HB messages Usage: jcluser@ubuntu:~$ ls
HealthBot Listener script Expand source
# We need to import request to access the details of the POST request from flask import Flask, request from flask_restful import abort import commands import json import pprint import requests import os import user_functions requests.packages.urllib3.disable_warnings() # Initialize the Flask application app = Flask(__name__) @app.route('/', methods=['POST']) def app_message_post(): print "################# Start #######################" if request.headers['Content-Type'] != 'application/json': abort(400, message="Expected Content-Type = application/json") try: data = request.json print data message = data['message'] print "message " + message playbook_name = data['keys']['_playbook_name'] print "playbook_name " + playbook_name #test_name = data['keys']['test-name'] #print "interface-name " + test_name device_id = data['device-id'] #print device_id + " " + message #if spec['eventRuleId'] == g_rule_id: # state = status['state'] # device_id = status['entityId'] # if state == "active" and device_id == g_device_id: # print 'DATA_ACTIVE :: ', pprint.pprint(data) # user_functions.move_traffic() # print 'traffic detoured and Slack was notified' # elif state == "inactive": # #print 'DATA_INACTIVE :: ', pprint.pprint(data) # print 'LSP path can be changed back' #return json.dumps({'result': 'OK'}) if playbook_name == "cpu_openconfig": print "received cpu high alert" if "exceeds high threshold" in message: print 'CPU HIGH UTIL DETECTED for ' + device_id print 'PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for ' + device_id #create maintenance for simulation purpose rest_index_number = user_functions.get_node_info(device_id) rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_simulation', 'node') maintenance_event = user_functions.create_maintenance(rest_payload) maintenance_index = maintenance_event.json()['maintenanceIndex'] check_simulation = user_functions.check_if_simulation_pass() print "simulation result " + check_simulation user_functions.delete_maintenance(maintenance_index) print "delete temp maintenace" if check_simulation == 'true': print 'CPU HIGH UTIL DETECTED PUT NODE UNDER MAINTENANCE::' # pprint.pprint(data) #print "rest_node_name, rest_index_number" + rest_node_name + rest_index_number rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_maint', 'node') print rest_payload user_functions.create_maintenance(rest_payload) else: print 'CANNOT PUT ' + device_id + ' UNDER MAINTENANCE. EXHUASTIVE FAILURE SIMULATION NOT PASSED' elif "is normal" in message: #print 'DATA_INACTIVE :: ', pprint.pprint(data) print 'CPU util back to normal. ' print '###############################' if playbook_name == "delay": print "received delay alert" source_address = data['keys']['source-address'] #print "interface-ip " + source_address #target_address = data['keys']['target_address'] #print "message" + message if "exceeds delay threshold" in message: print "HIGH DELAY DETECTED for " + device_id + " " + source_address print "PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for " + device_id + " " + source_address #create maintenance for simulation purpose rest_index_number = user_functions.get_link_info_from_ip(source_address) rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_simulation', 'link') maintenance_event = user_functions.create_maintenance(rest_payload) maintenance_index = maintenance_event.json()['maintenanceIndex'] check_simulation = user_functions.check_if_simulation_pass() print "SIMULATION RESULT " + check_simulation user_functions.delete_maintenance(maintenance_index) #print "delete temp maintenace" if check_simulation == "true": print "HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE::" # pprint.pprint(data) #print "rest_node_name, rest_index_number" + rest_node_name + rest_index_number rest_payload = user_functions.generate_maitenance_json(rest_index_number, 'for_maint', 'link') print rest_payload user_functions.create_maintenance(rest_payload) else: print "CANNOT PUT " + device_id + " " + source_address + " UNDER MAINTENANCE. EXHUASTIVE FAILURE SIMULATION NOT PASSED" elif "is normal" in message: #print 'DATA_INACTIVE :: ', pprint.pprint(data) print "DELAY back to normal. " print "###############################" """ if event_rule_id == AppFormixInterfaceL3IncompleteEventID: print "Received interface l3 incomplete alert" if state == "active": rest_payload = user_functions.generate_link_maitenance_json() print rest_payload user_functions.create_maintenance(rest_payload) print 'Put problematic link into maintenance mode' elif state == "inactive": # print 'DATA_INACTIVE :: ', pprint.pprint(data) print 'link back to normal. you can complete the maintenance event' """ return json.dumps({'result': 'OK'}) except Exception as e: abort(400, message="Exception processing request: {0}".format(e)) print '...' if __name__ == '__main__': app.run( host="0.0.0.0", port=int("10000") ) user_function.py
user_functions.py Expand source
import json from pprint import pprint import os from jinja2 import Environment, FileSystemLoader import datetime import time import requests url = 'http://100.123.16.0:8091/Northstar/API/v2/tenant/1/topology/1/' node_url_test = url + 'nodes' node_url = url + 'nodes' link_url = url + 'links' lsp_url = url + 'te-lsps' token_url = 'https://100.123.16.0:8443/oauth2/token' maintenance_url = url + 'maintenances' run_simulation_url = url + 'rpc/simulation' hearders_token = {'Content-Type': 'application/json'} user = 'admin' password = 'Juniper!1' def get_token(): r = requests.post(token_url, auth=('admin', 'Juniper!1'), data='{"grant_type":"password","username":"admin","password":"Juniper!1"}', he aders=hearders_token, verify=False) return r.json()['access_token'] token = get_token() headers = {'Authorization': str('Bearer ' + token), 'Content-Type': 'application/json'} def get_node_info(hostname): network_info = get_node() for i in network_info.json(): if i['hostName'] == hostname: index_number = i['nodeIndex'] return index_number def get_link_info(linkname): network_info = get_link() for i in network_info.json(): if i['name'] == linkname: index_number = i['linkIndex'] return index_number def get_link_info_from_ip(interface_ip): network_info = get_link() for i in network_info.json(): if (i['endA']['ipv4Address']['address'] == interface_ip) or (i['endZ']['ipv4Address']['address'] == interface_ip): index_number = i['linkIndex'] return index_number def get_link_from_nodeID_and_interface(nodeID,interface_name): network_info = get_link() for i in network_info.json(): if ((i['endA']['node']['id'] == nodeID) and (i['endA']['interfaceName'] == interface_name)) or ((i['endZ']['node']['id'] == nodeID) and (i['endZ']['interfaceName'] == interface_name)): link = i return link def get_nodeID_from_hostname(hostname): network_info = get_node() for i in network_info.json(): if i['hostName'] == hostname: nodeID = i['id'] return nodeID ''' def move_traffic(): contents = open('new_path.json', 'rb').read() print(contents) r = requests.post(lsp_url, data=contents, headers=headers, verify=False) # print(r) def move_traffic2(): contents = open('new_path.json', 'rb').read() print(contents) r = requests.put(lsp_url, data=contents, headers=headers, verify=False) # print(r) def move_traffic_back(): contents = open('original_path.json', 'rb').read() print(contents) r = requests.post(lsp_url, data=contents, headers=headers, verify=False) ''' def get_node(): r = requests.get(node_url, headers=headers, verify=False) return (r) def get_link(): r = requests.get(link_url, headers=headers, verify=False) return (r) def create_maintenance(payload): print(payload) r = requests.post(maintenance_url, data=payload, headers=headers, verify=False) return r def delete_maintenance(maint_index): maint_index = str(maint_index) delete_maint_url = maintenance_url + '/' + maint_index r = requests.delete(delete_maint_url, headers=headers, verify=False) return r def generate_maitenance_json(index_number, use, maintenance_type): #start = 1 for now # end = 6000 maintenance_type = maintenance_type current_time=datetime.datetime.utcnow().strftime("%Y%m%d%H%M") if use == 'for_simulation': name = 'created_for_simulation' start = 3600 end = 6000 else: name = 'Healthbot-' + maintenance_type + '-health-alert' + current_time start = 1 end = 6000 THIS_DIR = os.path.dirname(os.path.abspath('__file__')) j2_env = Environment(loader=FileSystemLoader(THIS_DIR), trim_blocks=True) payload = j2_env.get_template('maintenance.j2').render( maintenance_type=maintenance_type, index_number=index_number, current_time=current_time, name=name, start_time=getTimeSeqUTC(start), end_time=getTimeSeqUTC(end) ) return (payload) ''' def generate_link_maitenance_json(): index_number = get_link_info("L10.135.5.1_10.135.5.2") THIS_DIR = os.path.dirname(os.path.abspath(__file__)) j2_env = Environment(loader=FileSystemLoader(THIS_DIR), trim_blocks=True) payload = j2_env.get_template('maintenance.j2').render( maintenance_type='link', index_number=index_number, current_time=datetime.datetime.utcnow().strftime("%Y%m%d%H%M"), start_time=getTimeSeqUTC(1), end_time=getTimeSeqUTC(6000) ) return payload ''' def generate_link_traffic_threshold_payload(linkIndex,linkID,endA_ID,endZ_ID,endA_Threshold,endZ_Threshold): THIS_DIR = os.path.dirname(os.path.abspath('__file__')) j2_env = Environment(loader=FileSystemLoader(THIS_DIR), trim_blocks=True) payload = j2_env.get_template('link_traffic_threshold.j2').render( linkID=linkID, linkIndex=linkIndex, endA_ID=endA_ID, endZ_ID=endZ_ID, endA_Threshold= endA_Threshold, endZ_Threshold= endZ_Threshold ) return payload def update_link_traffic_threshold(payload, linkIndex): print payload linkIndex = str(linkIndex) linkThresholdURL = link_url + '/' + linkIndex print linkThresholdURL r = requests.put(linkThresholdURL, data=payload, headers=headers, verify=False) return r def getTimeSeqUTC(num): # tz = pytz.timezone('America/New_York') # a = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") a = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") b_start = time.mktime(time.strptime(a, '%Y-%m-%d %H:%M:%S')) + int(num) * 60 dateA = str(time.strftime("%Y%m%d", time.localtime(b_start))) timeA = str(time.strftime("%H%M", time.localtime(b_start))) juniorTime = 'T'.join([dateA, timeA]) endstr = "00" finalTime = ''.join([juniorTime, endstr]) return finalTime + 'Z' def set_overload_bit(router_to_configure): global user global password print "making confugration changes on " + router_to_configure print "#####################################" dev = Device(host=router_to_configure, user=user, password=password).open() with Config(dev) as cu: cu.load('set protocols isis overload', format='set') cu.pdiff() cu.commit() def get_management_ip(host_name): network_info = './network_device.json' data = json.loads(open(network_info).read()) for i in data['NetworkDeviceList']: # pprint(i) # i['NetworkDevice']['Name'] if i['NetworkDevice']['Name'] == host_name: management_ip = i['NetworkDevice']['ManagementIp'] return management_ip def run_simulation(simulation_name): simulation_name = simulation_name simulation_type = "link" simulation_payload = '{"topoObjectType":"maintenance","topologyIndex":1,"elements":[{"type":"maintenance","maintenanceName":"' + simulat ion_name + '"},"' + simulation_type + '"]}' r = requests.post(run_simulation_url, data=simulation_payload, headers=headers, verify=False) return r def get_simulation_report(simulationID): simulationID = simulationID simulation_report_url = url + 'rpc/simulation/' + simulationID + '/Report/L2_PeakSimRoute.r0' r = requests.get(simulation_report_url, headers=headers, verify=False) return r def check_if_simulation_pass(): check_passed = 'true' simulation_name = 'created_for_simulation' simulation_type = "link" simulation_payload = '{"topoObjectType":"maintenance","topologyIndex":1,"elements":[{"type":"maintenance","maintenanceName":"' + simulat ion_name + '"},"' + simulation_type + '"]}' r = requests.post(run_simulation_url, data=simulation_payload, headers=headers, verify=False) simulationID=r.json()['simulationId'] simulation_report_url = url + 'rpc/simulation/' + simulationID + '/Report/L2_PeakSimRoute.r0' report = requests.get(simulation_report_url, headers=headers, verify=False) if "NotRouted" in report.content: check_passed = 'false' return check_passed def print_simulation_failure_content(report): lines = report.content.split('\n') for line in lines: if '#' in line: print line elif '*' in line: print line elif 'S' in line: line = line.split(',') print line[0] + ',' + line[1] + ',' + line[2] + ',' + line[3] + ',' + line[4] + ',' + line[5] + ',' + line[6] |
add delay | Log into the CentOS ( jcluser / Juniper!1 ) [root@CentOS ~]# more add_delay.sh |
healthbot_listener.py output | HealthBot : monitopr the RPM HB >>> threshold cross >>> change the status on the HB + send an "put in maintenance the link" to NortStar
output Expand source
jcluser@ubuntu:~/self_healing$ python healthbot_listener.py /usr/local/lib/python2.7/dist-packages/urllib3/connectionpool.py:1004: InsecureRequestWarning: Unverified HTTPS request is being ma de. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warn ings InsecureRequestWarning, * Serving Flask app "healthbot_listener" (lazy loading) * Environment: production WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. * Debug mode: off * Running on http://0.0.0.0:10000/ (Press CTRL+C to quit) ################# Start ####################### {u'group': u'group_all', u'severity': u'major', u'keys': {u'_instance_id': u'["delay"]', u'source-address': u'7.105.106.1', u'_play book_name': u'delay'}, u'device-id': u'vMX-5', u'rule': u'probe_delay', u'topic': u'probe-delay', u'trigger': u'probe_exceed', u'me ssage': u'7.105.106.1 ge-0/0/3.0 delay is 303099 exceeds delay threshold 200000 us. '} message 7.105.106.1 ge-0/0/3.0 delay is 303099 exceeds delay threshold 200000 us. playbook_name delay ############################### received delay alert HIGH DELAY DETECTED for vMX-5 7.105.106.1 PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for vMX-5 7.105.106.1 { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "created_for_simulation", "startTime": "20200716T230300Z", "endTime": "20200718T150300Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } SIMULATION RESULT true HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE:: { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141103", "startTime": "20200714T110400Z", "endTime": "20200718T150300Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141103", "startTime": "20200714T110400Z", "endTime": "20200718T150300Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } ############################### 172.22.0.3 - - [14/Jul/2020 04:03:29] "POST / HTTP/1.1" 200 - ################# Start ####################### {u'group': u'group_all', u'severity': u'major', u'keys': {u'_instance_id': u'["delay"]', u'source-address' : u'7.105.106.2', u'_playbook_name': u'delay'}, u'device-id': u'vMX-6', u'rule': u'probe_delay', u'topic': u'probe-delay', u'trigger': u'probe_exceed', u'message': u'7.105.106.2 ge-0/0/3.0 delay is 274028 exceed s delay threshold 200000 us. '} message 7.105.106.2 ge-0/0/3.0 delay is 274028 exceeds delay threshold 200000 us. playbook_name delay ############################### received delay alert HIGH DELAY DETECTED for vMX-6 7.105.106.2 PERFORMING EXHUASTIVE LINK FAILURE SIMULATION for vMX-6 7.105.106.2 { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "created_for_simulation", "startTime": "20200716T230400Z", "endTime": "20200718T150400Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } SIMULATION RESULT true HIGH DELAY DETECTED PUT LINK UNDER MAINTENANCE:: { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141104", "startTime": "20200714T110500Z", "endTime": "20200718T150400Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } { "topoObjectType": "maintenance", "topologyIndex": 1, "user": "admin", "name": "Healthbot-link-health-alert202007141104", "startTime": "20200714T110500Z", "endTime": "20200718T150400Z", "elements": [ { "topoObjectType": "link", "index": 9 } ] } ############################### 172.22.0.3 - - [14/Jul/2020 04:04:29] "POST / HTTP/1.1" 200 - |
WebHook ( send to webserver ) on HB | 1- WebHook config under: Settings Menu >> Notification Settings2- Add the WebHook Notification to the Device group under: Dashboard >> Select a Device group
2- Add the WebHook Notification to the Device group under: Dashboard >> Select a Device group |
- No labels