首页 > 数据库 > Oracle > [20211130]为什么出现负数.txt
[20211130]为什么出现负数.txt
--//生产系统华为做的无用监控,出现一个奇怪的现象,做一个简单分析。
1.环境:
> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
> @ dashtop sql_id "machine like 'H3C%'" trunc(sysdate)-1 trunc(sysdate)
Total
Seconds AAS %This SQL_ID FIRST_SEEN LAST_SEEN
--------- ------- ------- ------------- ------------------- -------------------
28580 .3 47% 5r14h528vkacs 2021-11-29 00:01:32 2021-11-29 23:58:30
18250 .2 30% 8ss7js42xzp05 2021-11-29 00:13:10 2021-11-29 23:59:51
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3650 .0 6% c3jafyjuwt13b 2021-11-29 00:06:15 2021-11-29 23:58:41
740 .0 1% f454ryjfx6syf 2021-11-29 00:04:01 2021-11-29 23:59:01
680 .0 1% ck5qb9zs2n34g 2021-11-29 00:03:13 2021-11-29 23:33:58
610 .0 1% 8sxz1p1238fyh 2021-11-29 00:03:51 2021-11-29 23:38:50
510 .0 1% cyfdvynj0mtc8 2021-11-29 00:08:06 2021-11-29 23:58:51
470 .0 1% 5ub6g7qwaf35x 2021-11-29 01:28:48 2021-11-29 23:33:47
370 .0 1% 9yfzqfdw2yhs4 2021-11-29 00:03:41 2021-11-29 23:38:40
350 .0 1% 5t9zzqmqdyxbg 2021-11-29 00:33:47 2021-11-29 23:53:49
330 .0 1% 27m1sf1nknfz2 2021-11-29 00:13:20 2021-11-29 23:39:00
320 .0 1% fpamfm2pkznu1 2021-11-29 00:08:16 2021-11-29 18:35:27
270 .0 0% 19nrxkxw2b8j1 2021-11-29 00:18:13 2021-11-29 23:33:43
260 .0 0% 21t4z1r0k4cyd 2021-11-29 01:06:13 2021-11-29 23:18:39
220 .0 0% 93jgxvdzsx4y1 2021-11-29 00:48:46 2021-11-29 23:51:56
200 .0 0% 18q3m92yk5zg5 2021-11-29 00:09:04 2021-11-29 23:53:59
200 .0 0% 8fm0xfacp0b0g 2021-11-29 05:48:51 2021-11-29 21:53:53
180 .0 0% 6uz4za48wf6j7 2021-11-29 02:18:08 2021-11-29 23:28:55
170 .0 0% 6sbq34x7ckff7 2021-11-29 01:13:50 2021-11-29 20:38:52
170 .0 0% gwt7tu3383grt 2021-11-29 02:23:59 2021-11-29 22:53:56
20 rows selected.
--//查看下划线条sql语句。
> @ sqlid 8ss7js42xzp05
SQL_ID HASH_VALUE SQLTEXT
------------- ------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
8ss7js42xzp05 98554885 SELECT ROUND(100 *(1-A.MISSES / A.GETS), 2) latch_hit FROM( SELECT SNAP_ID, DBID, INSTANCE_NUMBER, MISSES - LAG(MISSES, 1, NULL) OVER(ORDER BY SNAP_ID) MISSES, GETS - LAG(GETS, 1, NULL) OVER(ORDER BY
SNAP_ID) GETS FROM ( SELECT SNAP_ID, DBID, INSTANCE_NUMBER, SUM(MISSES) MISSES, SUM(GETS) GETS FROM DBA_HIST_LATCH WHERE INSTANCE_NUMBER IN ( SELECT instance_number FROM v$INSTANCE) AND DBID IN ( SELE
CT DBID FROM v$database) GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER) ORDER BY SNAP_ID DESC) A WHERE rownum = 1
--//格式化如下:
SELECT ROUND (100 * (1 - A.MISSES / A.GETS), 2) latch_hit
FROM ( SELECT SNAP_ID
,DBID
,INSTANCE_NUMBER
,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES
,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS
FROM ( SELECT SNAP_ID
,DBID
,INSTANCE_NUMBER
,SUM (MISSES) MISSES
,SUM (GETS) GETS
FROM DBA_HIST_LATCH
WHERE INSTANCE_NUMBER IN (SELECT instance_number
FROM v$INSTANCE)
AND DBID IN (SELECT DBID
FROM v$database)
GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
ORDER BY SNAP_ID DESC) A
WHERE ROWNUM = 1;
--//可以看出就是取awr最后两个差值。在一个小时内这样的查询latch_hit百分比怎么会有变化,可以肯定一般misses很少,结果应该接
--//近100%。实际上这些东西就是忽悠人的东西,根本毫无用处,还不如toad下database monitor简单实用。
--//查询结果如下,明显出现问题,怎么会出现大于100%的情况,出现溢出吗。
LATCH_HIT
-------------
106.72
select * from (
SELECT SNAP_ID
--,DBID
,INSTANCE_NUMBER
,GETS
,misses
,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES1
,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS1
FROM ( SELECT SNAP_ID
,DBID
,INSTANCE_NUMBER
,SUM (MISSES) MISSES
,SUM (GETS) GETS
FROM DBA_HIST_LATCH
WHERE INSTANCE_NUMBER IN (SELECT instance_number
FROM v$INSTANCE)
AND DBID IN (SELECT DBID
FROM v$database)
GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
ORDER BY SNAP_ID DESC) where rownum<=2;
SNAP_ID INSTANCE_NUMBER GETS MISSES MISSES1 GETS1
------------- --------------- ------------- ------------- ------------- -------------
62159 1 1325998236957 869554321 18008851 -267855682
62158 1 1326266092639 851545470 37590358 90100279
2 rows selected.
--//这样看gets并不是很大,为什么出现最大snap_id的gets比下一个snap_id的gets小的情况呢。仔细看看前面有一个sum汇总,看看一
--//些细节
SELECT SNAP_ID
-- ,DBID
,INSTANCE_NUMBER
, MISSES
, GETS
,LATCH_NAME
FROM DBA_HIST_LATCH
WHERE INSTANCE_NUMBER IN (SELECT instance_number FROM v$INSTANCE)
AND DBID IN (SELECT DBID FROM v$database)
AND SNAP_ID in ( 62158,62159)
and latch_name='cache buffers chains'
order by gets desc;
SNAP_ID INSTANCE_NUMBER MISSES GETS LATCH_NAME
------------- --------------- ------------- ------------- --------------------
62158 1 836511814 1249549542753 cache buffers chains
62159 1 854386428 1248737851161 cache buffers chains
--//1248737851161-1249549542753 = -811691592.出现了负数。为什么呢?
column HOST_NAME noprint
column INSTANCE_NAME noprint
column DB_NAME noprint
column DBID noprint
select * from (select * from DBA_HIST_DATABASE_INSTANCE where instance_number =1 order by 3 desc) where rownum<=3
/
INSTANCE_NUMBER STARTUP_TIME PAR VERSION LAST_ASH_SAMPLE_ID PLATFORM_NAME
--------------- ----------------------- --- -------------- ------------------ ----------------
1 2021-11-26 19:38:15.000 YES 11.2.0.4.0 222680974 Linux x86 64-bit
1 2021-10-31 05:41:30.000 YES 11.2.0.4.0 222356033 Linux x86 64-bit
1 2021-10-31 05:34:02.000 YES 11.2.0.4.0 0 Linux x86 64-bit
--//这个时间段很明显数据库并没有重启。
select * from
(SELECT SNAP_ID
-- ,DBID
,INSTANCE_NUMBER
, MISSES
, GETS
,LATCH_NAME
,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS1
FROM DBA_HIST_LATCH
WHERE INSTANCE_NUMBER IN (SELECT instance_number FROM v$INSTANCE)
AND DBID IN (SELECT DBID FROM v$database)
-- and SNAP_ID in ( 62158,62159)
and latch_name='cache buffers chains'
order by snap_id desc)
where gets1<0;
> set numw 15
> /
SNAP_ID INSTANCE_NUMBER MISSES GETS LATCH_NAME GETS1
--------------- --------------- --------------- --------------- -------------------- ---------------
62159 1 854386428 1248737851161 cache buffers chains -811691592
62158 1 836511814 1249549542753 cache buffers chains -421460196
62068 1 61996 964237686 cache buffers chains -3344960661046
62056 1 1474503455 3284864196923 cache buffers chains -6003979992
62047 1 1473983577 3276119437122 cache buffers chains -3578219873
62046 1 1473534857 3279697656995 cache buffers chains -1184524959
62037 1 1460525952 3250157788769 cache buffers chains -665290398
62035 1 1456437704 3243620822520 cache buffers chains -1577069275
61990 1 1386072147 3137679778941 cache buffers chains -1046511393
61975 1 1370929739 3102129814110 cache buffers chains -1173033933
61784 1 770631163 2131136455538 cache buffers chains -309450438
61764 1 744693650 2053424060395 cache buffers chains -1895267888
61588 1 366281461 974340934754 cache buffers chains -964787623
61427 1 48 771254 cache buffers chains -61944486703368
14 rows selected.
--//可以看出多次出现了负数。
> select STARTUP_TIME,BEGIN_INTERVAL_TIME,END_INTERVAL_TIME from DBA_HIST_SNAPSHOT where snap_id in (61427,62068) and instance_number=1;
STARTUP_TIME BEGIN_INTERVAL_TIME END_INTERVAL_TIME
----------------------- ------------------------ ------------------------
2021-11-26 19:38:15.000 2021-11-26 19:38:15.000 2021-11-26 20:00:47.136
2021-10-31 02:57:49.000 2021-10-31 02:57:49.000 2021-10-31 03:00:11.216
--//变化很大的snap_id跟数据库启动是相关的。其它出现负数说明oracle的设计有bug,不大可能出现溢出的说法。
--//另外华为的研发写sql太差劲了,我自己改写的版本:
--//dbid,instance_number 应该读写到变量,减少对控制文件的访问。
SELECT /*+ gather_plan_statistics */ ROUND (100 * (1 - A.MISSES / A.GETS), 2) latch_hit
FROM ( SELECT SNAP_ID
,DBID
,INSTANCE_NUMBER
,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES
,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS
FROM ( SELECT SNAP_ID
,DBID
,INSTANCE_NUMBER
,SUM (MISSES) MISSES
,SUM (GETS) GETS
FROM DBA_HIST_LATCH
WHERE (SNAP_ID,INSTANCE_NUMBER,DBID) IN (SELECT SNAP_ID,INSTANCE_NUMBER,DBID
FROM ( SELECT snap_id,instance_number,DBID
FROM DBA_HIST_SNAPSHOT
WHERE INSTANCE_NUMBER = SYS_CONTEXT ('USERENV' ,'INSTANCE')
AND DBID = &dbid
ORDER BY 1 DESC)
WHERE ROWNUM <= 2)
GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
ORDER BY SNAP_ID DESC) a
WHERE ROWNUM = 1;
LATCH_HIT
----------
99.19
1 row selected.
Elapsed: 00:00:00.01
> @ a1.txt
LATCH_HIT
----------
99.19
Elapsed: 00:00:00.87
--//比对方写的快不少。
--//在我看来这东西就是无聊骗人的东西,对于诊断一点帮助都没有,出问题这个值也是接近100%。
来自 “ ITPUB博客 ” ,链接:https://blog.itpub.net/267265/viewspace-2844951/,如需转载,请注明出处,否则将追究法律责任。