1 production 1.1 /home/production/cvs/JSOC/doc/dcs_warmstandby.txt
2
3 NOTICE: This isn't blind step-by-step procedures. You must understand
4 what is being done so that you can interpret the directions for what
5 exactly you want to do!
6
7 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
8
9 !!!BEGIN: To set up warm-standby for dcs0 on dcs2::
10 Assumes no DB for dcs0 running on dcs2. If so,
11 pg_ctl -D /var/lib/pgsql/dcs0_data/ stop -m fast
12 Make sure no /tmp/pgsql.trigger.5430 on dcs2
13
14 Login to dcs0 as postgres
15 > ls -1 / | grep WALarchive | wc -l
16 1
17
18 NOTE: the original dcs0 will return 1. Orig dcs2 converted to dcs0 returns 0.
19
20 If (0) then
21 cp configfiles/postgresql.conf.dcs0.0 dcs0_data/postgresql.conf
22 production 1.1 rm -f /data/WALarchive/5430/*
23
24 if (1) then
25 cp configfiles/postgresql.conf.dcs0.1 dcs0_data/postgresql.conf
26 rm -f /WALarchive/5430/*
27
28 Restart the dcs0 database to create WAL archives:
29 > pg_ctl -D /var/lib/pgsql/dcs0_data -m fast stop
30 > pg_ctl -D /var/lib/pgsql/dcs0_data/ start
31
32 Begin the backup process on dcs0:
33 > psql -p 5430 -c "select pg_start_backup('2010-03-15')"
34
35 Create a tar archive of the system data directory:
36 > tar -czvf dcs0_backup.tar.gz dcs0_data/
37
38 Stop the backup process on dcs0:
39 > psql -p 5430 -c "select pg_stop_backup()"
40
41 Copy the archive file from dcs0 to dcs2:
42 > scp dcs0_backup.tar.gz postgres@dcs2:/var/lib/pgsql/
43 production 1.1
44 Now login to dcs2 as postgres:
45 > rm -rf dcs0_data
46 > rm -rf dcs1_data (NOT if just doing warm-standby for dcs0??)
47
48 Uncompress the archive file:
49 > tar -xzvf dcs0_backup.tar.gz
50
51 > ls -1 / | grep WALarchive | wc -l
52 0
53
54 if(0) then
55 cp configfiles/recovery.conf.dcs0.0 dcs0_data/recovery.conf
56 cp configfiles/postgresql.conf.dcs0.ws dcs0_data/postgresql.conf
57 if(1) then
58 cp configfiles/recovery.conf.dcs0.1 dcs0_data/recovery.conf
59 cp configfiles/postgresql.conf.dcs0.ws dcs0_data/postgresql.conf
60
61 Remove the postmaster.pid file:
62 > rm dcs0_data/postmaster.pid
63
64 production 1.1 > chmod 700 /var/lib/pgsql/dcs0_data/
65
66 Start the dcs0 warm standby server, execute the command:
67 > pg_ctl -D /var/lib/pgsql/dcs0_data/ start
68
69 The server should go into recovery mode and stay there.
70 After everything looks good, remove the tar file from dcs2:
71 > rm dcs0_backup.tar.gz
72 !!!END: To set up warm-standby for dcs0 on dcs2::
73 ==========================================================================
74
75
76 !!!BEGIN: To set up warm-standby for dcs1 on dcs2::
77 Assumes no DB for dcs1 running on dcs2. If so,
78 pg_ctl -D /var/lib/pgsql/dcs1_data/ stop -m fast
79 Make sure no /tmp/pgsql.trigger.5431 on dcs2
80
81 Login to dcs1 as postgres
82 > ls -1 / | grep WALarchive | wc -l
83 1
84
85 production 1.1 NOTE: the original dcs0 will return 1. Orig dcs2 converted to dcs0 returns 0.
86
87 If (0) then
88 cp configfiles/postgresql.conf.dcs1.0 dcs1_data/postgresql.conf
89 rm -f /data/WALarchive/5431/*
90
91 if (1) then
92 cp configfiles/postgresql.conf.dcs1.1 dcs1_data/postgresql.conf
93 rm -f /WALarchive/5431/*
94
95 Restart the dcs1 database to create WAL archives:
96 > pg_ctl -D /var/lib/pgsql/dcs1_data -m fast stop
97 > pg_ctl -D /var/lib/pgsql/dcs1_data/ start
98
99 Begin the backup process on dcs1:
100 > psql -p 5431 -c "select pg_start_backup('2010-03-15')"
101
102 Create a tar archive of the system data directory:
103 > tar -czvf dcs1_backup.tar.gz dcs1_data/
104
105 Stop the backup process on dcs0:
106 production 1.1 > psql -p 5431 -c "select pg_stop_backup()"
107
108 Copy the archive file from dcs1 to dcs2:
109 > scp dcs1_backup.tar.gz postgres@dcs2:/var/lib/pgsql/
110
111 Now login to dcs2 as postgres:
112 > rm -rf dcs1_data
113
114 Uncompress the archive file:
115 > tar -xzvf dcs1_backup.tar.gz
116
117 > ls -1 / | grep WALarchive | wc -l
118 0
119
120 if(0) then
121 cp configfiles/recovery.conf.dcs1.0 dcs1_data/recovery.conf
122 cp configfiles/postgresql.conf.dcs1.ws dcs1_data/postgresql.conf
123 if(1) then
124 cp configfiles/recovery.conf.dcs1.1 dcs1_data/recovery.conf
125 cp configfiles/postgresql.conf.dcs1.ws dcs1_data/postgresql.conf
126
127 production 1.1 Remove the postmaster.pid file:
128 > rm dcs1_data/postmaster.pid
129
130 > chmod 700 /var/lib/pgsql/dcs1_data/
131
132 Start the dcs1 warm standby server, execute the command:
133 > pg_ctl -D /var/lib/pgsql/dcs1_data/ start
134
135 The server should go into recovery mode and stay there.
136 After everything looks good, remove the tar file from dcs2:
137 > rm dcs1_backup.tar.gz
138 !!!END: To set up warm-standby for dcs1 on dcs2::
139 ==========================================================================
140
141 !!!BEGIN: dcs0 fails. Switch over to warm-standby on dcs2:
142 Ensure that the database on the dcs0 server is down:
143 > pg_ctl -D /var/lib/pgsql/dcs0_data -m fast stop
144
145 Login to dcs2 as postgres.
146 Make sure no dcs1 warm-standby is running:
147 > pg_ctl -D /var/lib/pgsql/dcs1_data/ stop -m fast
148 production 1.1
149 To bring the dcs0 warm standby out of recovery mode and make it live:
150 NOTE: run psql -p 5430 -l before and after this to see the db come online
151 > touch /tmp/pgsql.trigger.5430
152
153 !!TBD find out when to rm /tmp/pgsql.trigger.5430
154
155 Now make dcs2 the new dcs0. Run as root:
156 (MAKE sure old dcs0 is down. Do not bring it up again with 2 dcs0's)
157 > /etc/sysconfig/network/AIA-up
158
159 To restore to dcs2 again:
160 > /etc/sysconfig/network/MYSELF-up
161
162
163 As user production on the new dcs0:
164 > vncserver -geometry 1280x1024
165 On j0:
166 j0:/home/production> vncviewer
167 answer dcs0.jsoc.Stanford.EDU:1
168
169 production 1.1 Apply crontab -e from /home/production/crontab.dcs0
170
|