Jan Wieck wieck at lists.slony.info
Wed May 28 12:46:48 PDT 2008
Fixed problem where ACCEPT_SET would wait for the corresponding
MOVE_SET or FAILOVER_SET to arrive while holding an exclusive lock
on sl_config_lock, preventing the other remote worker to process
that event.


*** 286,290 ****
  	SlonDString query1;
  	SlonDString query2;
! 	SlonDString lsquery;
  	SlonWorkMsg *msg;
  	SlonWorkMsg_event *event;
--- 286,290 ----
  	SlonDString query1;
  	SlonDString query2;
! 	SlonDString query3;
  	SlonWorkMsg *msg;
  	SlonWorkMsg_event *event;
*** 294,297 ****
--- 294,298 ----
  	bool		event_ok;
  	bool		need_reloadListen = false;
+ 	char		conn_symname[32];
*** 305,309 ****
  	if (wd == 0)
! 		slon_log(SLON_ERROR, "remoteWorkerThread_%d: could not malloc() space for WorkerGroupData\n");
--- 306,311 ----
  	if (wd == 0)
! 		slon_log(SLON_ERROR, "remoteWorkerThread_%d: could not malloc() space for WorkerGroupData\n",
! 				node->no_id);
*** 329,338 ****
! 	dstring_init(&lsquery);
  	 * Connect to the local database
! 	if ((local_conn = slon_connectdb(rtcfg_conninfo, "remote_worker")) == NULL)
  	local_dbconn = local_conn->dbconn;
--- 331,341 ----
! 	dstring_init(&query3);
  	 * Connect to the local database
! 	sprintf(conn_symname, "remoteWorkerThread_%d", node->no_id);
! 	if ((local_conn = slon_connectdb(rtcfg_conninfo, conn_symname)) == NULL)
  	local_dbconn = local_conn->dbconn;
*** 1009,1016 ****
  					while (PQntuples(res) == 0)
  						slon_log(SLON_DEBUG1, "ACCEPT_SET - MOVE_SET or FAILOVER_SET not received yet - sleep\n");
  						if (sched_msleep(node, 10000) != SCHED_STATUS_OK)
! 						PQclear(res);
  						res = PQexec(local_dbconn, dstring_data(&query2));
--- 1012,1039 ----
  					while (PQntuples(res) == 0)
+ 						PQclear(res);
  						slon_log(SLON_DEBUG1, "ACCEPT_SET - MOVE_SET or FAILOVER_SET not received yet - sleep\n");
+ 						/* Rollback the transaction for now */
+ 						(void) slon_mkquery(&query3, "rollback transaction");
+ 						if (query_execute(node, local_dbconn, &query3) < 0)
+ 							slon_retry();
+ 						/* Sleep */
  						if (sched_msleep(node, 10000) != SCHED_STATUS_OK)
! 						/* Start the transaction again */
! 						(void) slon_mkquery(&query3,
! 							"begin transaction; "
! 							"set transaction isolation level serializable; ");
! 						slon_appendquery(&query1,
! 							 "lock table %s.sl_config_lock; ",
! 							 rtcfg_namespace);
! 						if (query_execute(node, local_dbconn, &query3) < 0)
! 							slon_retry();
! 						/* See if we have the missing event now */
  						res = PQexec(local_dbconn, dstring_data(&query2));

*** 147,150 ****
--- 147,168 ----
+ 	if (slon_log_level >= SLON_DEBUG1)
+ 	{
+ 		slon_mkquery(&query, "select pg_backend_pid()");
+ 		res = PQexec(dbconn, dstring_data(&query));
+ 		if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
+ 		{
+ 			slon_log(SLON_ERROR, "%s: Unable to get backend pid - %s\n",
+ 				symname, PQresultErrorMessage(res));
+ 		}
+ 		else
+ 		{
+ 			slon_log(SLON_DEBUG1, "%s \"%s\": backend pid = %s\n",
+ 				symname, conninfo, PQgetvalue(res, 0, 0));
+ 		}
+ 		PQclear(res);
+ 	}
  	return conn;

