Friday, 13 March 2020

Mongo Replication health checking

node.json

{ 
"adh_dev": [{"node":"amdbdabc0101:2700"}, {"node":"amdbdabc0102:2700"}]
}







''' 

Script Purpose : Mongo Database Cluster Heath Check and Failover 
Team : My team
Version : V1.0.0
'''
import pymongo
import os
import sys
import time
import json
from pymongo.errors import AutoReconnect

os.environ['HTTPS_PROXY']='http://proxy.abc.com:8099'

os.system('python send_sns.py')

#_sendSns()
#programPause = input("Press the <ENTER> key to region failover.....")

def _screen_clr():
    print(chr(27)+'[2j')
    print('\033c')
    print('\x1bc')

_screen_clr()

def _draw_line():
    print(".............................................................................................................................")

def _red_text():
    print("\033[1;31;40m \n")
   
def _normal_text():
    print("\033[1;37;40m \n")
#_normal_text()

primary_optime = 0
secondary_optime = 0
Primary_node=""


'''
Loading Data Node and Arbiter node into list from json input file
'''

east_data_nodes=[]
west_data_nodes=[]
east_arbiter_nodes=[]
west_arbiter_nodes=[]
data_nodes=[]

def _load_cluster_nodes():
    global east_data_nodes,west_data_nodes,east_arbiter_nodes,west_arbiter_nodes
    with open('dms_qa_nodes.json', 'r') as f:
        nodes_dict = json.load(f)

    for key in nodes_dict['east_data']:
        east_data_nodes.append(key['node'])
    for key in nodes_dict['west_data']:
        west_data_nodes.append(key['node'])
    for key in nodes_dict['east_arbiter']:
        east_arbiter_nodes.append(key['node'])
    for key in nodes_dict['west_arbiter']:
        west_arbiter_nodes.append(key['node'])
    #print("data nodes"+str(data_nodes))
_load_cluster_nodes()


def _validate_user_input():
    global data_nodes,east_data_nodes,west_data_nodes
    data_nodes=east_data_nodes+west_data_nodes
    cnt=0
    for node in data_nodes:
        #print("input node "+str(sys.argv[1]))
        if node.split('.',1)[0] != str(sys.argv[1]):
            #print(node.split('.',1)[0])
            cnt +=1
            #print('count : '+str(cnt))
            if cnt == len(data_nodes):
                sys.exit("Input node name  is not valid, existing")
            
_validate_user_input()


''' Mongo user credentails '''

x='abcd'


'''
Mongo DB Connection and getting server status
'''


conn = pymongo.MongoClient('mongodb://qmdbdabc_cluster_admin:'+x.split('4',1)[1]+'@'+str(sys.argv[1])+':27101',connectTimeoutMS=5000, socketTimeoutMS=5000)
db = conn['admin']
db_stats = db.command({'replSetGetStatus'  :1})
db_connections=db.command("serverStatus")["connections"]
current_connections=db_connections['current']
free_connections=db_connections['available']




def _find_primary_secondary():

    '''
    Listing Primary & Secondary nodes and Replication Lag status
    '''
    global Primary_node
    for key in db_stats['members'] :
        if key['stateStr'] != 'SECONDARY' and key['stateStr'] != 'PRIMARY' and key['stateStr'] != 'ARBITER':
            #_red_text()
            sys.exit("Cluster node "+key['name']+" is having heath issue, please fix it before processing further")
        if key['stateStr'] == 'PRIMARY' :
            primary_optime =key['optimeDate']
            print("Primary node     : "+key['name'])
            Primary_node=key['name']
        if key['stateStr'] == 'ARBITER' :
            print("Arbiter node     : "+key['name'])
    seconds_lag=0
    _draw_line()
    for key in db_stats['members'] :
        if key['stateStr'] == 'SECONDARY' :
            secondary_optime = key['optimeDate']
            seconds_lag = (primary_optime - secondary_optime ).total_seconds()
            print("Secondary node   : "+key['name']+"  Replication Lag   : "+str(seconds_lag)+"Secs")
        if (seconds_lag > 30) :
            #_red_text()
            sys.exit("Cluster node "+key['name']+" replication lag is "+str(seconds_lag)+" Secs, please wait before processing further")
    _draw_line()
    print("All secondary nodes replication status is looking good")
    print("DB current connections count    : "+str(current_connections))
    print("DB available  connections count : "+str(free_connections))
    
_find_primary_secondary()
_draw_line()

def _sendSns():
   msglist="Starting cluster failover"
   topic='arn:aws:sns:us-east-1:abcd:teamnotifiy'
   sns  = boto3.client('sns',region_name='us-east-1')
   subject  = 'Cluster failover'

   try:
       #print("{0},{1},{2}".format(topic,msglist,subject))
       sns.publish(TopicArn=topic,Message=msglist,Subject=subject)
   except ClientError as e:
       print("error publising to "+ topic)
       print(e.response['Error']['Code'])

No comments:

Post a Comment