Python统计数据库中的数据量【含MySQL、Oracle】

合集下载

1、下载文档前请自行甄别文档内容的完整性，平台不提供额外的编辑、内容补充、找答案等附加服务。
2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
3、如文档侵犯您的权益，请联系客服反馈,我们会尽快为您处理(人工客服工作时间：9:00-18:30)。

Python统计数据库中的数据量【含MySQL、Oracle】
Python程序⽂件如下：
# -*- coding: utf-8
# File : start.py
# Author : baoshan
import json
import pymysql
import cx_Oracle
import pandas as pd
def main():
dataSum = []
connInfo = "connInfo.json"# 配置⽂件名称
connFile = open(connInfo, 'r', encoding='utf8')
connRecords = connFile.read(102400) #⼀次读取多个字节
connRecordsjs = json.loads(connRecords)
for single in connRecordsjs:
if"mysql" == single.get("dbtype"):
conn = pymysql.connect(host=single.get("host"), port=single.get("port"), user=single.get("user"),
passwd=single.get("passwd"), charset=single.get("charset"))
if"gongxiangwangzhan" == single.get("source", "0"): # 共享⽹站公安局、民政局、聊城市发展和改⾰委员会定制
sql = "select table_schema as '数据库', " \
"table_name as '数据表', " \
"TABLE_COMMENT as '表注释', " \
"round(data_length/1024/1024,2) as '数据⼤⼩(M)', " \
"round(index_length/1024/1024,2) as '索引⼤⼩（M）', " \
"TABLE_ROWS as '⾏数' " \
"from information_schema.tables " \
"where TABLE_SCHEMA in ('"+single.get("dbschema")+"') " \
"AND TABLE_ROWS > 0 " \
"and table_name in "+single.get("selectkeystr")+""
else:
sql = "select " \
"table_schema as '数据库'," \
"table_name as '数据表', " \
"TABLE_COMMENT as '表注释', " \
"round(data_length/1024/1024,2) as '数据⼤⼩(M)', " \
"round(index_length/1024/1024,2) as '索引⼤⼩（M）', " \
"TABLE_ROWS as '⾏数'" \
"from information_schema.tables " \
"where TABLE_SCHEMA in ('"+single.get("dbschema")+"') " \
"and (table_name "+single.get("selectstr")+" '"+single.get("selectkeystr")+"') " \
"and TABLE_ROWS > 0"
df = pd.read_sql(sql, conn)
print(single.get("key"), str(df['⾏数'].sum()))
dataSum.append(df['⾏数'].sum())
conn.close()
elif"oracle" == single.get("dbtype"):
if"table" == single.get("selecttype"):
sql = "select owner as owner," \
"table_name as table_name," \
"tablespace_name as tablespace_name, " \
"num_rows as num_rows " \
"from all_tables " \
"where num_rows > 0 " \
"and table_name like '"+single.get("selectkeystr")+"' " \
"order by num_rows desc "
elif"database" == single.get("selecttype"): # 共享⽹站-oracle-⼯商局定制
sql = "select owner as owner, " \
"table_name as table_name, " \
"tablespace_name as tablespace_name, " \
"num_rows as num_rows " \
"from all_tables " \
"where num_rows > 0 " \
"and tablespace_name in('"+single.get("dbschema")+"') " \
"order by num_rows desc"
db = cx_Oracle.connect(single.get("connstr"), encoding='utf-8')
cursor = db.cursor()
cursor.execute(sql)
rs = cursor.fetchall()
df = pd.DataFrame(rs)
print(single.get("key"), str(df[3].sum()))
dataSum.append(df[3].sum())
cursor.close()
db.close()
elif"sqlserver" == single.get("dbtype"):
print(single.get("key"), '55568045')
dataSum.append(55568045)
# "SELECT ,B.ROWS FROM sysobjects A JOIN sysindexes B ON A.id = B.id WHERE A.xtype = 'U' AND B.indid IN(0,1) and b.rows >0 ORDER BY B.ROWS DESC"
else:
print("please give right database type.")
connFile.close()
print('-'*30)
print("数据量总计：", str(sum(dataSum)))
if__name__ == '__main__':
print("***⼀次性统计所有对接数据的委办局，和其对应的数据（条数）***")
main()
所需要的配置⽂件格式如下：
[
{
"key": "智慧公交",
"dbtype": "oracle",
"connstr": "nicai/123456@10.10.10.10:1521/ORCL",
"selecttype": "table",
"selectstr": "like",
"selectkeystr": "BUS%"
},
{
"key": "公共⾃⾏车",
"dbtype": "oracle",
"connstr": "nicai/123456@10.10.10.10:1521/ORCL",
"selecttype": "table",
"selectstr": "like",
"selectkeystr": "BICYCLE%"
},
{
"key": "安监局",
"dbtype": "mysql",
"host": "10.10.10.10",
"port": 3306,
"user": "nicai",
"passwd": "123456",
"charset": "utf8",
"selecttype": "table",
"selectstr": "like",
"dbschema": "statistics_data",
"selectkeystr": "ajj%"
},
{
"key": "百度交通",
"dbtype": "mysql",
"host": "10.10.10.2",
"port": 3306,
"user": "nicai",
"passwd": "123456",
"charset": "utf8",
"selecttype": "table",
"selectstr": "like",
"dbschema": "statistics_data",
"selectkeystr": "bdu%"
}
]
关于SqlServer的数据量查询，由于当时连不上，就没有嵌⼊到这个程序中。

不过查询的⽅法已经列出。

精进⾃⼰，分享他⼈！
谢谢。