背景
在之前搭建了databus 用于oracle数据同步到mysql,同时也用了otter+canal 用于mysql数据同步到oracle,之前也写过监控databus和验证数据的脚本,但也只是验证,真正邮件收到不一致的数据后需要来做修复。所以验证数据的脚本产生了。
思路
产生不一致的数据无非是mysql端或oracle端或多或少数据,我们只按照id来进行比对就好(每5000个id对比一次),通过set去重,最终结果输出到文本中。
代码
读取配制文件中的表名,将查到的结果存入set,用mysql oracle两个结果集的差值得到其或多或少的id。
import pymysql
import cx_Oracle
def readTableName(filename,ouser,opword,odb,muser,mpword,hostname,mdb):
with open(filename,'r') as f:
for fcon in f.readlines():
fc = fcon.strip().strip('\n')
# escape startswith # line
if fc.startswith('#'):
continue
elif fc == "" or fc is None:
continue
else:
#execute sql
try:
#pdb.set_trace()
info = 'table_name:{:30} '.format(fc)
writeFile(info)
resstr='mysql has {} and oracle has {} '
#table_name add prifix
owner = 'tc'
# start with t owner = tc else kc
if not fc.startswith('t'):
owner='kc'
selsql = 'select min(id),max(id) from {}'.format(fc)
#
rescnt = executeSqlStatistic(hostname,muser,mpword,mdb,selsql)
v_begin = rescnt[0][0]
#print(v_begin)
v_end = rescnt[0][1]
#print(v_end)
v_step = v_begin+1000 #get 5000 id per times
mysqlset = {}
oracleset = {}
msetres = {"",} #result of mysql set
osetres = {"",} #result of oralce set
selidsql = 'select id from {} where id>={} and id< {}'
selidora = 'select id from {}.{} where id>={} and id< {}'
while v_begin <= v_end:
idsql = selidsql.format(fc,v_begin,v_step)
resmid = executeSqlStatistic(hostname,muser,mpword,mdb,idsql)
#print(resmid)
mysqlset = set(resmid) #get mysql id set
oidsql = selidora.format(owner,fc,v_begin,v_step)
resoid = executeStatistic(ouser,opword,odb,oidsql)
oracleset = set(resoid)
#write mysql and oralce id cnt
if len(mysqlset) != len(oracleset):
writeFile("mset min id {} max id {} cnt is {} and oset cnt is {} \n".format(v_begin,v_step,len(mysqlset),len(oracleset)))
#get m - o set and o - m set and clear set add v_begin
msetres.update(mysqlset - oracleset)
oracleset.update(oracleset - mysqlset)
v_begin += 1000
v_step += 1000
#print("resmset {}".format(msetres),"\n")
#print("resoset {}".format(osetres),"\n")
mysqlset = {}
oracleset = {}
#print(reso)
orares = resstr.format(msetres,osetres)
writeFile(orares)
writeFile('\n')
except Exception as e:
print(e)
连接mysql和oracle读取数据方法
def executeStatistic(user,pword,instance,sql):
conn = cx_Oracle.connect(user,pword,instance)
cursor = conn.cursor()
#owner = 'tczx'
# start with t owner = tczx else kcrm
#if not table_name.startswith('t'):
# owner='kcrm'
#selsql = 'select max(id),count(1) from {}.{}'.format(owner,table_name)
cursor.execute(sql)
res =[]
try:
res = cursor.fetchall()
except cx_Oracle.Error as err:
print(err)
finally:
cursor.close()
conn.close()
#print(res)
return res
#execute from mysql
def executeSqlStatistic(hostname,uname,pword,db_name,sql):
conn = pymysql.connect(host=hostname, port=3306, user=uname, password=pword)
cursor = conn.cursor()
changedb = 'use `{}`'.format(db_name)
cursor.execute( changedb )
#selsql = 'select max(id),count(1) from {}'.format(table_name)
cursor.execute(sql)
res=[]
try:
res = cursor.fetchall()
except pymysql.Error as err:
print(err)
finally:
cursor.close()
conn.close()
return res
记录日志
#write statistic to file
def writeFile(info,filename='consistency_log'):
with open(filename,'a+') as f:
f.write(info)
主方法,读取配制文件最好使用绝对路径,这样即使把脚本放到定时任务中也不会出问题
if __name__ == '__main__':
with open('statistic_log','w') as f:
f.write('')
hostname = ''
mdb = 'dbname'
muser = 'user'
mpword = 'pwd'
#readTableName(filename,ouser,opword,odb,muser,mpword,hostname,mdb):
odb = 'ip:port/orcl'
ouser = 'user'
opword = 'pwd'
readTableName('check_table.txt',ouser,opword,odb,muser,mpword,hostname,mdb)
本文介绍了如何验证Oracle数据库与MySQL数据库在数据同步后的一致性。通过比较两个数据库中相同ID的数量,利用集合操作找出不一致的记录,并将结果输出到文本文件中。文章提供了实现此功能的思路及代码示例。


被折叠的 条评论
为什么被折叠?



