Tue Dec 8 09:56:26 PST 2009
- Previous message: [Slony1-general] Problem with execute script / slony version 1.2.17
- Next message: [Slony1-general] performance problem: slony 1.2.18 locks table in finishTableAfterCopy()
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
On Dec 8, 2009, at 1:33 AM, Maxim Boguk wrote:
> I have a bit uncommon configuration now.
> Master: postgresql 8.3
> Slave: postgresql 8.4
> slony: version 1.2.17
>
You know what is funny - I *JUST* ran into this problem too!
It only seems to happen when not specifying "only on xxx"
Here's a **HACK** solution to get your replication going again:
create or replace function _replication.altertableforreplication(int)
returns int
as $$
declare
p_tab_id alias for $1;
v_no_id int4;
v_tab_row record;
v_tab_fqname text;
v_tab_attkind text;
v_n int4;
v_trec record;
v_tgbad boolean;
begin
-- ----
-- Grab the central configuration lock
-- ----
lock table "_replication".sl_config_lock;
-- ----
-- Get our local node ID
-- ----
v_no_id := "_replication".getLocalNodeId('_replication');
-- ----
-- Get the sl_table row and the current origin of the table.
-- Verify that the table currently is NOT in altered state.
-- ----
select T.tab_reloid, T.tab_set, T.tab_idxname, T.tab_altered,
S.set_origin, PGX.indexrelid,
"_replication".slon_quote_brute(PGN.nspname) || '.' ||
"_replication".slon_quote_brute(PGC.relname) as tab_fqname
into v_tab_row
from "_replication".sl_table T, "_replication".sl_set S,
"pg_catalog".pg_class PGC, "pg_catalog".pg_namespace PGN,
"pg_catalog".pg_index PGX, "pg_catalog".pg_class PGXC
where T.tab_id = p_tab_id
and T.tab_set = S.set_id
and T.tab_reloid = PGC.oid
and PGC.relnamespace = PGN.oid
and PGX.indrelid = T.tab_reloid
and PGX.indexrelid = PGXC.oid
and PGXC.relname = T.tab_idxname
for update;
if not found then
raise exception 'Slony-I: alterTableForReplication(): Table with id
% not found', p_tab_id;
end if;
v_tab_fqname = v_tab_row.tab_fqname;
if v_tab_row.tab_altered then
raise notice 'Slony-I: alterTableForReplication(): Table % is
already in altered state',
v_tab_fqname;
return p_tab_id;
end if;
v_tab_attkind :=
"_replication".determineAttKindUnique(v_tab_row.tab_fqname,
v_tab_row.tab_idxname);
execute 'lock table ' || v_tab_fqname || ' in access exclusive mode';
-- ----
-- Procedures are different on origin and subscriber
-- ----
if v_no_id = v_tab_row.set_origin then
-- ----
-- On the Origin we add the log trigger to the table and done
-- ----
execute 'create trigger "_replication_logtrigger_' ||
p_tab_id::text || '" after insert or update or delete on ' ||
v_tab_fqname || ' for each row execute procedure
"_replication".logTrigger (''_replication'', ''' ||
p_tab_id::text || ''', ''' ||
v_tab_attkind || ''');';
else
-- ----
-- On the subscriber the thing is a bit more difficult. We want
-- to disable all user- and foreign key triggers and rules.
-- ----
-- ----
-- Check to see if there are any trigger conflicts...
-- ----
v_tgbad := 'false';
for v_trec in
select pc.relname, tg1.tgname from
"pg_catalog".pg_trigger tg1,
"pg_catalog".pg_trigger tg2,
"pg_catalog".pg_class pc,
"pg_catalog".pg_index pi,
"_replication".sl_table tab
where
tg1.tgname = tg2.tgname and -- Trigger names match
tg1.tgrelid = tab.tab_reloid and -- trigger 1 is on the table
pi.indexrelid = tg2.tgrelid and -- trigger 2 is on the index
pi.indrelid = tab.tab_reloid and -- indexes table is this table
pc.oid = tab.tab_reloid
loop
raise notice 'Slony-I: alterTableForReplication(): multiple
instances of trigger % on table %',
v_trec.tgname, v_trec.relname;
v_tgbad := 'true';
end loop;
if v_tgbad then
raise exception 'Slony-I: Unable to disable triggers';
end if;
-- ----
-- Disable all existing triggers
-- ----
update "pg_catalog".pg_trigger
set tgrelid = v_tab_row.indexrelid
where tgrelid = v_tab_row.tab_reloid
and not exists (
select true from "_replication".sl_table TAB,
"_replication".sl_trigger TRIG
where TAB.tab_reloid = tgrelid
and TAB.tab_id = TRIG.trig_tabid
and TRIG.trig_tgname = tgname
);
get diagnostics v_n = row_count;
if (v_n > 0) and exists (select 1 from information_schema.columns
where table_name = 'pg_class' and table_schema = 'pg_catalog' and
column_name = 'reltriggers') then
update "pg_catalog".pg_class
set reltriggers = reltriggers - v_n
where oid = v_tab_row.tab_reloid;
end if;
-- ----
-- Disable all existing rules
-- ----
update "pg_catalog".pg_rewrite
set ev_class = v_tab_row.indexrelid
where ev_class = v_tab_row.tab_reloid;
get diagnostics v_n = row_count;
if v_n > 0 then
update "pg_catalog".pg_class
set relhasrules = false
where oid = v_tab_row.tab_reloid;
end if;
-- ----
-- Add the trigger that denies write access to replicated tables
-- ----
execute 'create trigger "_replication_denyaccess_' ||
p_tab_id::text || '" before insert or update or delete on ' ||
v_tab_fqname || ' for each row execute procedure
"_replication".denyAccess (''_replication'');';
end if;
-- ----
-- Mark the table altered in our configuration
-- ----
update "_replication".sl_table
set tab_altered = true where tab_id = p_tab_id;
return p_tab_id;
end;
$$
language 'plpgsql';
you'll need to change the _replication to _slony - in a nuthshell it
makes the tab being altered not an error condition (changing tht
EXCEPTION to a NOTICE).
This is a hack solution until a proper fix is done up - was about to
fire up some test instances and trace what happens.
> Any execute script surely break replication set. Even if i submit
> something like 'select 1' via execte script replication will be broken
> down unrepairable. Only way fix it is drop/create/subscribe slave
> node.
> restarting slon doesn't help at all...
>
> Here is slony log from slave database:
>
> 2009-12-08 09:11:43 MSK INFO prepare for DDL script - set:1
> onlyonnode:-1
> 2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: "select
> "_slony".ddlScript_prepare_int(1, -1); " PGRES_FATAL_ERROR ERROR:
> Slony-I: alterTableRestore(): Table "public"."access_type" is not in
> altered state
> CONTEXT: SQL statement "SELECT "_slony".alterTableRestore( $1 )"
> PL/pgSQL function "ddlscript_prepare_int" line 46 at PERFORM
> 2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: DDL preparation
> failed - set 1 - only on node -1
> 2009-12-08 09:11:43 MSK DEBUG1 slon: retry requested
> 2009-12-08 09:11:43 MSK INFO remoteListenThread_1: disconnecting
> from 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
> 2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 main: scheduler mainloop returned
> 2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 main: done
> 2009-12-08 09:11:43 MSK DEBUG1 slon: restart of worker
> 2009-12-08 09:11:43 MSK CONFIG main: slon version 1.2.17 starting up
> 2009-12-08 09:11:43 MSK CONFIG main: local node id = 2
> 2009-12-08 09:11:43 MSK CONFIG main: launching sched_start_mainloop
> 2009-12-08 09:11:43 MSK CONFIG main: loading current cluster
> configuration
> 2009-12-08 09:11:43 MSK CONFIG storeNode: no_id=1
> no_comment='director'
> 2009-12-08 09:11:43 MSK CONFIG storePath: pa_server=1 pa_client=2
> pa_conninfo="dbname=plus1 host=192.168.1.137 port=5432 user=slony"
> pa_connretry=10
> 2009-12-08 09:11:43 MSK CONFIG storeListen: li_origin=1 li_receiver=2
> li_provider=1
> 2009-12-08 09:11:43 MSK CONFIG storeSet: set_id=1 set_origin=1
> set_comment='A replication set so boring no one thought to give it a
> name'
> 2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
> worker thread
> 2009-12-08 09:11:43 MSK CONFIG storeSubscribe: sub_set=1
> sub_provider=1 sub_forward='t'
> 2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
> worker thread
> 2009-12-08 09:11:43 MSK CONFIG enableSubscription: sub_set=1
> 2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
> worker thread
> 2009-12-08 09:11:43 MSK CONFIG main: configuration complete -
> starting threads
> 2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread starts
> NOTICE: Slony-I: cleanup stale sl_nodelock entry for pid=31722
> 2009-12-08 09:11:43 MSK CONFIG enableNode: no_id=1
> 2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread starts
> 2009-12-08 09:11:43 MSK DEBUG1 remoteWorkerThread_1: thread starts
> 2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread starts
> 2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread starts
> 2009-12-08 09:11:43 MSK DEBUG1 main: running scheduler mainloop
> 2009-12-08 09:11:43 MSK DEBUG1 remoteWorkerThread_1: helper thread for
> provider 1 created
> 2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: connected to
> 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
> 2009-12-08 09:11:43 MSK INFO Checking local node id
> 2009-12-08 09:11:43 MSK INFO Found local node id
> 2009-12-08 09:11:43 MSK INFO prepare for DDL script - set:1
> onlyonnode:-1
> 2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: "select
> "_slony".ddlScript_prepare_int(1, -1); " PGRES_FATAL_ERROR ERROR:
> Slony-I: alterTableRestore(): Table "public"."access_type" is not in
> altered state
> CONTEXT: SQL statement "SELECT "_slony".alterTableRestore( $1 )"
> PL/pgSQL function "ddlscript_prepare_int" line 46 at PERFORM
> 2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: DDL preparation
> failed - set 1 - only on node -1
> 2009-12-08 09:11:43 MSK DEBUG1 slon: retry requested
> 2009-12-08 09:11:43 MSK INFO remoteListenThread_1: disconnecting
> from 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
> 2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread done
> 2009-12-08 09:11:43 MSK DEBUG1 main: scheduler mainloop returned
> 2009-12-08 09:11:43 MSK DEBUG1 main: done
> 2009-12-08 09:11:43 MSK DEBUG1 slon: restart of worker
> 2009-12-08 09:11:43 MSK CONFIG main: slon version 1.2.17 starting up
> 2009-12-08 09:11:43 MSK CONFIG main: local node id = 2
> 2009-12-08 09:11:43 MSK CONFIG main: launching sched_start_mainloop
> 2009-12-08 09:11:43 MSK CONFIG main: loading current cluster
> configuration
> 2009-12-08 09:11:43 MSK CONFIG storeNode: no_id=1
> no_comment='director'
> 2009-12-08 09:11:43 MSK CONFIG storePath: pa_server=1 pa_client=2
> pa_conninfo="dbname=plus1 host=192.168.1.137 port=5432 user=slony"
> pa_connretry=10
> 2009-12-08 09:11:43 MSK CONFIG storeListen: li_origin=1 li_receiver=2
> li_provider=1
> 2009-12-08 09:11:43 MSK CONFIG storeSet: set_id=1 set_origin=1
> set_comment='A replication set so boring no one thought to give it a
> name'
> 2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
> worker thread
> 2009-12-08 09:11:43 MSK CONFIG storeSubscribe: sub_set=1
> sub_provider=1 sub_forward='t'
> 2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
> worker thread
> 2009-12-08 09:11:43 MSK CONFIG enableSubscription: sub_set=1
> 2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
> worker thread
> 2009-12-08 09:11:43 MSK CONFIG main: configuration complete -
> starting threads
> 2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread starts
> 2009-12-08 09:11:43 MSK FATAL localListenThread: "select
> "_slony".cleanupNodelock(); insert into "_slony".sl_nodelock values (
> 2, 0, "pg_catalog".pg_backend_pid()); " - ERROR: duplicate key
> value violates unique constraint "sl_nodelock-pkey"
>
> 2009-12-08 09:11:43 MSK DEBUG1 slon: shutdown requested
> 2009-12-08 09:12:03 MSK DEBUG1 slon: child termination timeout -
> kill child
> 2009-12-08 09:12:03 MSK DEBUG1 slon: done
>
>
> --
> =======================================================
> Я в контакте: http://vkontakte.ru/id16323414
> Сила солому ломит, но не все в нашей
> жизни - солома, да и сила далеко не все.
> _______________________________________________
> Slony1-general mailing list
> Slony1-general at lists.slony.info
> http://lists.slony.info/mailman/listinfo/slony1-general
--
Jeff Trout <jeff at jefftrout.com>
http://www.stuarthamm.net/
http://www.dellsmartexitin.com/
- Previous message: [Slony1-general] Problem with execute script / slony version 1.2.17
- Next message: [Slony1-general] performance problem: slony 1.2.18 locks table in finishTableAfterCopy()
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-general mailing list