星期三, 1月 06, 2016

Oracle 12c RAC VOTING disk破壞測試(normal reundancy)

我的測試環境:
GRID 12.1.0.2
DB 12.1.0.2
使用兩台主機, 分別跑instance1, instance2

#VOTING disk破壞測試(後來發現我的LAB, VOTING disk不是用normal redunancy)
dd if=/dev/zero of=/dev/sdi1 bs=1M count=20  #disk8
dd if=/dev/zero of=/dev/sdj1 bs=1M count=20  #disk9

/u01/app/12.1.0/grid/bin/crsctl stop crs -f
/u01/app/12.1.0/grid/bin/crsctl start crs -excl


SQL> select name , state from v$asm_diskgroup;

NAME                           STATE
------------------------------ -----------
VOTING                         DISMOUNTED
DATA                           MOUNTED

SQL>
col path for a25
select path, MOUNT_STATUS, HEADER_STATUS, MODE_STATUS, STATE from v$asm_disk;

SQL>
PATH                      MOUNT_S HEADER_STATU MODE_ST STATE
------------------------- ------- ------------ ------- --------
ORCL:DISK8                CLOSED  PROVISIONED  ONLINE  NORMAL
ORCL:DISK7                CLOSED  MEMBER       ONLINE  NORMAL
ORCL:DISK9                CLOSED  PROVISIONED  ONLINE  NORMAL
ORCL:DISK1                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK2                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK3                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK4                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK5                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK6                CACHED  MEMBER       ONLINE  NORMAL

V$ASM_DISK不應當顯示PROVISIONED
我們發現當checksum不正確時可能導致V$ASM_DISK.HEADER_STATUS顯示為PROVISIONED

dd if=/dev/zero of=/dev/sdh1 bs=1M count=20

/usr/sbin/oracleasm createdisk disk10 /dev/sdi1
/usr/sbin/oracleasm createdisk disk11 /dev/sdj1
/usr/sbin/oracleasm createdisk disk12 /dev/sdh1 (ORCL:DISK7, 我以為建立名字不重複的disk name就可以)

[root@db1 ~]# ps -ef |grep ASM |grep pmon
grid     21216     1  0 14:05 ?        00:00:00 asm_pmon_+ASM1
[root@db1 ~]#


su - grid
export ORACLE_HOME=/u01/app/12.1.0/grid
sqlplus / as sysasm
SQL>
col path for a25
select path, MOUNT_STATUS, HEADER_STATUS, MODE_STATUS, STATE from v$asm_disk;

PATH                      MOUNT_S HEADER_STATU MODE_ST STATE
------------------------- ------- ------------ ------- --------
ORCL:DISK11               CLOSED  PROVISIONED  ONLINE  NORMAL  ->PROVISIONED代表header資訊都已經被清空
ORCL:DISK12               CLOSED  PROVISIONED  ONLINE  NORMAL  ->PROVISIONED代表header資訊都已經被清空
ORCL:DISK10               CLOSED  PROVISIONED  ONLINE  NORMAL  ->PROVISIONED代表header資訊都已經被清空
ORCL:DISK1                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK2                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK3                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK4                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK5                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK6                CACHED  MEMBER       ONLINE  NORMAL

9 rows selected.

SQL>
col path for a25
col failgroup  for a10
select path, MOUNT_STATUS, HEADER_STATUS, MODE_STATUS, STATE, failgroup from v$asm_disk;

PATH                      MOUNT_S HEADER_STATU MODE_ST STATE
------------------------- ------- ------------ ------- --------
ORCL:DISK11               CLOSED  PROVISIONED  ONLINE  NORMAL
ORCL:DISK12               CLOSED  PROVISIONED  ONLINE  NORMAL
ORCL:DISK10               CLOSED  PROVISIONED  ONLINE  NORMAL
ORCL:DISK1                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK2                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK3                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK4                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK5                CACHED  MEMBER       ONLINE  NORMAL
ORCL:DISK6                CACHED  MEMBER       ONLINE  NORMAL

9 rows selected.


SQL> alter diskgroup all mount;  -->手動mount all diskgroup發現mount不起來
alter diskgroup all mount
*
ERROR at line 1:
ORA-15032: not all alterations performed
ORA-15017: diskgroup "VOTING" cannot be mounted
ORA-15040: diskgroup is incomplete
ORA-15017: diskgroup "DATA" cannot be mounted
ORA-15013: diskgroup "DATA" is already mounted


SQL>

drop diskgroup voting including contents ;  -->也無法drop voting diskgroup

*
ERROR at line 1:
ORA-15039: diskgroup not dropped
ORA-15001: diskgroup "VOTING" does not exist or is not mounted

SQL> SELECT name, failgroup FROM v$asm_disk;

NAME                           FAILGROUP
------------------------------ ------------------------------



DISK1                          DISK1
DISK2                          DISK2
DISK3                          DISK3
DISK4                          DISK4
DISK5                          DISK5
DISK6                          DISK6

9 rows selected.


SQL>
--查了一下MOS上面的資訊, 說是要重新建一個同名的diskgroup , drop diskgroup把資訊清乾淨

CREATE DISKGROUP VOTING NORMAL REDUNDANCY
FAILGROUP FG1 DISK 'ORCL:DISK10'
FAILGROUP FG2 DISK 'ORCL:DISK11' ,'ORCL:DISK12'
ATTRIBUTE 'au_size'='4M',
          'compatible.asm' = '11.2',
          'compatible.rdbms' = '11.2';

SQL> drop diskgroup VOTING;


CREATE DISKGROUP NEWVOTING NORMAL REDUNDANCY
FAILGROUP FG1 DISK 'ORCL:DISK10'
FAILGROUP FG2 DISK 'ORCL:DISK11'
FAILGROUP FG3 DISK 'ORCL:DISK12'
ATTRIBUTE 'au_size'='4M',
          'compatible.asm' = '11.2',
          'compatible.rdbms' = '11.2';

Diskgroup created.

/usr/sbin/oracleasm createdisk disk13 /dev/sdk1

CREATE DISKGROUP VOTING EXTERNAL REDUNDANCY   -->重新建一個external redunancyvoting disk
DISK 'ORCL:DISK13'
ATTRIBUTE 'au_size'='4M',
          'compatible.asm' = '11.2',
          'compatible.rdbms' = '11.2';

As root:

/u01/app/12.1.0/grid/bin/crsctl replace votedisk +VOTING
[root@db1 ~]# /u01/app/12.1.0/grid/bin/crsctl replace votedisk +VOTING
Successful addition of voting disk 72523eaa74914f02bffd7ab3373e6ecb.
Successfully replaced voting disk group with +VOTING.
CRS-4266: Voting file(s) successfully replaced


/u01/app/12.1.0/grid/bin/crsctl query css votedisk

[root@db1 ~]# /u01/app/12.1.0/grid/bin/crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
1. ONLINE   72523eaa74914f02bffd7ab3373e6ecb (ORCL:DISK13) [VOTING]
Located 1 voting disk(s).
[root@db1 ~]#

/u01/app/12.1.0/grid/bin/crsctl replace votedisk +NEWVOTING
[root@db1 ~]# /u01/app/12.1.0/grid/bin/crsctl replace votedisk +NEWVOTING
Successful addition of voting disk 2b3844998d1a4fc3bfc31b71e92117fc.
Successful addition of voting disk 52c64ccac12e4f01bf30708fd3dda186.
Successful addition of voting disk 2bf78e0bff894f4dbf1bf8bebcce2a17.
Successful deletion of voting disk 72523eaa74914f02bffd7ab3373e6ecb.
Successfully replaced voting disk group with +NEWVOTING.
CRS-4266: Voting file(s) successfully replaced

[root@db1 ~]#  /u01/app/12.1.0/grid/bin/crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
1. ONLINE   2b3844998d1a4fc3bfc31b71e92117fc (ORCL:DISK10) [NEWVOTING]
2. ONLINE   52c64ccac12e4f01bf30708fd3dda186 (ORCL:DISK11) [NEWVOTING]
3. ONLINE   2bf78e0bff894f4dbf1bf8bebcce2a17 (ORCL:DISK12) [NEWVOTING]
Located 3 voting disk(s).
[root@db1 ~]#

------------------------------------------------------------------------
在環境都建立好以後(ASM Diskgroup NEWVOTING 建立三個FG Group : FG1, FG2, FG3

以下再重新破壞一次測試

col path for a25
col failgroup  for a10
select path, MOUNT_STATUS, HEADER_STATUS, MODE_STATUS, STATE, failgroup from v$asm_disk;
PATH                      MOUNT_S HEADER_STATU MODE_ST STATE    FAILGROUP
------------------------- ------- ------------ ------- -------- ----------
ORCL:DISK1                CACHED  MEMBER       ONLINE  NORMAL   DISK1
ORCL:DISK10               CACHED  MEMBER       ONLINE  NORMAL   FG1   --> /dev/sdi1
ORCL:DISK11               CACHED  MEMBER       ONLINE  NORMAL   FG2   --> /dev/sdj1
ORCL:DISK12               CACHED  MEMBER       ONLINE  NORMAL   FG3   --> /dev/sdh1
ORCL:DISK2                CACHED  MEMBER       ONLINE  NORMAL   DISK2
ORCL:DISK3                CACHED  MEMBER       ONLINE  NORMAL   DISK3
ORCL:DISK4                CACHED  MEMBER       ONLINE  NORMAL   DISK4
ORCL:DISK5                CACHED  MEMBER       ONLINE  NORMAL   DISK5
ORCL:DISK6                CACHED  MEMBER       ONLINE  NORMAL   DISK6
ORCL:DISK13               CACHED  MEMBER       ONLINE  NORMAL   DISK13

10 rows selected.

[root@db1 ~]# dd if=/dev/zero of=/dev/sdi1 bs=1M count=20
20+0 records in
20+0 records out
20971520 bytes (21 MB) copied, 0.0299748 s, 700 MB/s
[root@db1 ~]# oracleasm scandisks
Reloading disk partitions: done
Cleaning any stale ASM disks...
Cleaning disk "DISK10"
Scanning system for ASM disks...
[root@db1 ~]# oracleasm listdisks
DISK1
DISK11
DISK12
DISK13
DISK2
DISK3
DISK4
DISK5
DISK6

重開機db1之後, 可以觀察到資料庫還是正常啟動
[grid@db1 ~]$ srvctl status database -d msg
Instance msg1 is running on node db1
Instance msg2 is running on node db2

dd if=/dev/zero of=/dev/sdj1 bs=1M count=20

重開機db2之後, 發現db2起不來
[grid@db1 ~]$ srvctl status database -d msg
Instance msg1 is running on node db1
Instance msg2 is not running on node db2

[root@db2 ~]# ps -ef |grep pmon
root      7015  6994  0 15:30 pts/0    00:00:00 grep pmon

接下來使用手動的方式, votingNEWVOTING DISKGROUP移動到VOTING DISKGROUP
[root@db1 ~]# /u01/app/12.1.0/grid/bin/crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
1. ONLINE   c368d52024be4ff9bf626e75dea5d8cf (ORCL:DISK13) [VOTING]
Located 1 voting disk(s).
[root@db1 ~]#

然後再度重開機db2, 就發現資料庫已經修好了, done

[grid@db1 ~]$ srvctl status database -d msg
Instance msg1 is running on node db1
Instance msg2 is running on node db2

dd if=/dev/zero of=/dev/sdi1 bs=1M count=20
dd if=/dev/zero of=/dev/sdj1 bs=1M count=20
dd if=/dev/zero of=/dev/sdh1 bs=1M count=20

/usr/sbin/oracleasm scandisks
/usr/sbin/oracleasm createdisk disk10 /dev/sdh1
/usr/sbin/oracleasm createdisk disk11 /dev/sdi1
/usr/sbin/oracleasm createdisk disk12 /dev/sdj1

[root@db1 ~]# su - grid
[grid@db1 ~]$ sqlplus / as sysasm

SQL*Plus: Release 12.1.0.2.0 Production on Wed Jan 6 15:39:32 2016

Copyright (c) 1982, 2014, Oracle.  All rights reserved.


Connected to:
Oracle Database 12c Enterprise Edition Release 12.1.0.2.0 - 64bit Production
With the Real Application Clusters and Automatic Storage Management options

SQL>


CREATE DISKGROUP NEWVOTING NORMAL REDUNDANCY
FAILGROUP FG1 DISK 'ORCL:DISK10'
FAILGROUP FG2 DISK 'ORCL:DISK11'
FAILGROUP FG3 DISK 'ORCL:DISK12'
ATTRIBUTE 'au_size'='4M',
          'compatible.asm' = '11.2',
          'compatible.rdbms' = '11.2';

[root@db1 u01]# /u01/app/12.1.0/grid/bin/crsctl replace votedisk +NEWVOTING
Successful addition of voting disk a19e49f96ed24f21bf75bfc4ed3e395a.
Successful addition of voting disk 8cd3b1dd3c5c4f51bf9e31dc08760059.
Successful addition of voting disk cc7de87e4ce74f98bfb9463ba63445f1.
Successful deletion of voting disk 5e3c4d6773174fb1bf8344c7d3efa35d.
Successfully replaced voting disk group with +NEWVOTING.
CRS-4266: Voting file(s) successfully replaced

以上修復完成

心得就是, 不論有幾個voting disk , 各個線上的資料庫節點在voting 毀損的情況下, instance不會受影響
受影響的是下一次開機時, asm/ clusterware因無法偵測到足夠的voting disk票數, 就不會讓資料庫開起來

然後, 如果你的環境有三份voting disk , 要把其中一份voting放在NAS上面的方法為:
crsctl add css votedisk /voting_disk/vote_3    #http://www.oracle.com/technetwork/database/clusterware/overview/grid-infra-thirdvoteonnfs-131158.pdf

ref:
http://jaychu649.blogspot.tw/2015/11/11g-racocr-voring.html
http://jaychu649.blogspot.tw/2015/12/oracle-11g-rac-normal-redundancy-voting.html

沒有留言:

LinkWithin-相關文件

Related Posts Plugin for WordPress, Blogger...