Fri Aug 1 13:18:19 PDT 2008
- Previous message: [Slony1-commit] slony1-engine/src/slon slon.c
- Next message: [Slony1-commit] slony1-engine/src/slon remote_worker.c
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Update of /home/cvsd/slony1/slony1-engine/src/slon In directory main.slony.info:/tmp/cvs-serv25935 Modified Files: remote_worker.c Log Message: Add in additional performance monitoring, indicating how much time and how many queries are being spent against the provider + subscriber. This should enable people to more readily infer where bottlenecks are themselves, rather than having to go to Jan & Chris and have them infer things *really* indirectly. Index: remote_worker.c =================================================================== RCS file: /home/cvsd/slony1/slony1-engine/src/slon/remote_worker.c,v retrieving revision 1.173 retrieving revision 1.174 diff -C2 -d -r1.173 -r1.174 *** remote_worker.c 27 Jun 2008 20:16:04 -0000 1.173 --- remote_worker.c 1 Aug 2008 20:18:17 -0000 1.174 *************** *** 28,33 **** #define MAXGROUPSIZE 10000 /* What is the largest number of SYNCs we'd ! * want to group together??? */ /* ---------- * Local definitions --- 28,35 ---- #define MAXGROUPSIZE 10000 /* What is the largest number of SYNCs we'd ! * want to group together??? */ ! + void slon_terminate_worker(void); /* ---------- * Local definitions *************** *** 139,142 **** --- 141,161 ---- } WorkGroupLineCode; + typedef struct PerfMon_s PerfMon; /* Structure for doing performance monitoring */ + struct PerfMon_s + { + struct timeval prev_t; + struct timeval now_t; + double prov_query_t; /* Time spent running queries against the provider */ + int prov_query_c; /* Number of queries run against the provider */ + double subscr_query_t; /* Time spent running prep queries against the subscriber */ + int subscr_query_c; /* Number of prep queries run against the subscriber */ + double subscr_iud__t; /* Time spent running IUD against subscriber */ + int subscr_iud__c; /* Number of IUD requests run against subscriber */ + double large_tuples_t; /* Number of large tuples processed */ + int large_tuples_c; /* Number of large tuples processed */ + int num_inserts; + int num_updates; + int num_deletes; + }; struct ProviderInfo_s *************** *** 241,244 **** --- 260,274 ---- * ---------- */ + /* + * Monitoring data structure + */ + + static void init_perfmon(PerfMon *pm); + static void start_monitored_event(PerfMon *pm); + static void monitor_provider_query(PerfMon *pm); + static void monitor_subscriber_query(PerfMon *pm); + static void monitor_subscriber_iud(PerfMon *pm); + static void monitor_largetuples(PerfMon *pm); + static void adjust_provider_info(SlonNode *node, WorkerGroupData *wd, int cleanup); *************** *** 2103,2112 **** * Move the message to the head of the queue */ ! if (oldmsg != node->message_head) { DLLIST_REMOVE(node->message_head, ! node->message_tail, oldmsg); DLLIST_ADD_HEAD(node->message_head, ! node->message_tail, oldmsg); } } --- 2133,2142 ---- * Move the message to the head of the queue */ ! if ((SlonWorkMsg *) oldmsg != node->message_head) { DLLIST_REMOVE(node->message_head, ! node->message_tail, (SlonWorkMsg *) oldmsg); DLLIST_ADD_HEAD(node->message_head, ! node->message_tail, (SlonWorkMsg *) oldmsg); } } *************** *** 2420,2423 **** --- 2450,2470 ---- } } + if (sub_provider < 0) { + rtcfg_unlock(); + slon_log(SLON_ERROR, "remoteWorkerThread_%d: provider node for set %" + "not found in runtime configuration\n", + set_id); + slon_terminate_worker(); + return -1; + + } + if (set_origin < 0) { + rtcfg_unlock(); + slon_log(SLON_ERROR, "remoteWorkerThread_%d: origin node for set %" + "not found in runtime configuration\n", + set_id); + slon_terminate_worker(); + return -1; + } if (set == NULL) { *************** *** 2431,2435 **** { rtcfg_unlock(); ! slon_log(SLON_ERROR, "remoteWorkerThread_%d: node %d " "not found in runtime configuration\n", node->no_id, sub_provider); --- 2478,2482 ---- { rtcfg_unlock(); ! slon_log(SLON_ERROR, "remoteWorkerThread_%d: provider node %d " "not found in runtime configuration\n", node->no_id, sub_provider); *************** *** 3499,3502 **** --- 3546,3550 ---- slon_log(SLON_INFO, "copy_set %d done in %.3f seconds\n", set_id, TIMEVAL_DIFF(&tv_start, &tv_now)); + return 0; } *************** *** 3537,3542 **** --- 3585,3592 ---- int actionlist_len; int64 min_ssy_seqno; + PerfMon pm; gettimeofday(&tv_start, NULL); + slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: SYNC " INT64_FORMAT " processing\n", *************** *** 3547,3550 **** --- 3597,3602 ---- dstring_init(&lsquery); + init_perfmon(&pm); + /* * If this slon is running in log archiving mode, open a temporary file *************** *** 3590,3593 **** --- 3642,3647 ---- sprintf(conn_symname, "subscriber_%d_provider_%d", node->no_id, provider->no_id); + + provider->conn = slon_connectdb(provider->pa_conninfo, conn_symname); *************** *** 3610,3613 **** --- 3664,3668 ---- "select %s.registerNodeConnection(%d); ", rtcfg_namespace, rtcfg_nodeid); + start_monitored_event(&pm); if (query_execute(node, provider->conn->dbconn, &query) < 0) { *************** *** 3619,3622 **** --- 3674,3679 ---- return provider->pa_connretry; } + monitor_provider_query(&pm); + slon_log(SLON_DEBUG1, "remoteWorkerThread_%d: " "connected to data provider %d on '%s'\n", *************** *** 3718,3722 **** --- 3775,3783 ---- slon_appendquery(&query, "); "); + start_monitored_event(&pm); res1 = PQexec(local_dbconn, dstring_data(&query)); + slon_log(SLON_INFO, "about to monitor_subscriber_query - pulling big actionid list %d\n", provider); + monitor_subscriber_query(&pm); + if (PQresultStatus(res1) != PGRES_TUPLES_OK) { *************** *** 3773,3777 **** --- 3834,3840 ---- rtcfg_namespace, sub_set); + start_monitored_event(&pm); res2 = PQexec(local_dbconn, dstring_data(&query)); + monitor_subscriber_query(&pm); if (PQresultStatus(res2) != PGRES_TUPLES_OK) { *************** *** 3946,3949 **** --- 4009,4013 ---- rtcfg_namespace); res1 = PQexec(local_dbconn, dstring_data(&query)); + if (PQresultStatus(res1) != PGRES_TUPLES_OK) { *************** *** 4031,4035 **** --- 4095,4102 ---- if (wgline->log.n_used > 0) { + start_monitored_event(&pm); res1 = PQexec(local_dbconn, dstring_data(&(wgline->log))); + + monitor_subscriber_iud(&pm); if (PQresultStatus(res1) == PGRES_EMPTY_QUERY) { *************** *** 4049,4053 **** --- 4116,4122 ---- } + start_monitored_event(&pm); res1 = PQexec(local_dbconn, dstring_data(&(wgline->data))); + monitor_subscriber_iud(&pm); if (PQresultStatus(res1) == PGRES_EMPTY_QUERY) { *************** *** 4228,4232 **** --- 4297,4303 ---- " group by 1; "); + start_monitored_event(&pm); res1 = PQexec(provider->conn->dbconn, dstring_data(&query)); + monitor_subscriber_iud(&pm); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { *************** *** 4252,4255 **** --- 4323,4327 ---- rtcfg_namespace, seql_seqid, node->no_id, seqbuf, seql_last_value); + start_monitored_event(&pm); if (query_execute(node, local_dbconn, &query) < 0) { *************** *** 4260,4263 **** --- 4332,4336 ---- return 60; } + monitor_subscriber_iud(&pm); /* *************** *** 4344,4347 **** --- 4417,4432 ---- node->no_id, event->ev_seqno, TIMEVAL_DIFF(&tv_start, &tv_now)); + + slon_log(SLON_INFO, + "remoteWorkerThread_%d: SYNC " INT64_FORMAT " Timing: " + " pqexec (s/count)" + "- provider %.3f/%d " + "- subscriber %.3f/%d " + "- IUD %.3f/%d\n", + node->no_id, event->ev_seqno, + pm.prov_query_t, pm.prov_query_c, + pm.subscr_query_t, pm.prov_query_c, + pm.subscr_iud__t, pm.subscr_iud__c); + return 0; } *************** *** 4383,4393 **** int line_ncmds; ! int num_inserts, ! num_deletes, ! num_updates; ! num_inserts = 0; ! num_deletes = 0; ! num_updates = 0; dstring_init(&query); --- 4468,4474 ---- int line_ncmds; ! PerfMon pm; ! init_perfmon(&pm); dstring_init(&query); *************** *** 4618,4622 **** /* * First make sure that the overall memory usage is inside ! * bouds. */ if (wd->workdata_largemem > sync_max_largemem) --- 4699,4703 ---- /* * First make sure that the overall memory usage is inside ! * bounds. */ if (wd->workdata_largemem > sync_max_largemem) *************** *** 4759,4762 **** --- 4840,4844 ---- if (log_cmdsize >= sync_max_rowsize) { + start_monitored_event(&pm); (void) slon_mkquery(&query2, "select log_cmddata " *************** *** 4776,4779 **** --- 4858,4862 ---- log_origin, log_txid, log_actionseq); res2 = PQexec(dbconn, dstring_data(&query2)); + monitor_largetuples(&pm); if (PQresultStatus(res2) != PGRES_TUPLES_OK) { *************** *** 4839,4843 **** wd->tab_fqname[log_tableid], log_cmddata); ! num_inserts++; break; --- 4922,4926 ---- wd->tab_fqname[log_tableid], log_cmddata); ! pm.num_inserts++; break; *************** *** 4847,4851 **** wd->tab_fqname[log_tableid], log_cmddata); ! num_updates++; break; --- 4930,4934 ---- wd->tab_fqname[log_tableid], log_cmddata); ! pm.num_updates++; break; *************** *** 4855,4859 **** wd->tab_fqname[log_tableid], log_cmddata); ! num_deletes++; break; } --- 4938,4942 ---- wd->tab_fqname[log_tableid], log_cmddata); ! pm.num_deletes++; break; } *************** *** 4959,4967 **** slon_log(SLON_INFO, "remoteHelperThread_%d_%d: inserts=%d updates=%d deletes=%d\n", ! node->no_id, provider->no_id, num_inserts, num_updates, num_deletes); ! num_inserts = 0; ! num_deletes = 0; ! num_updates = 0; /* --- 5042,5052 ---- slon_log(SLON_INFO, "remoteHelperThread_%d_%d: inserts=%d updates=%d deletes=%d\n", ! node->no_id, provider->no_id, pm.num_inserts, pm.num_updates, pm.num_deletes); ! slon_log(SLON_INFO, ! "remoteWorkerThread_%d: sync_helper timing: " ! " large tuples %.3f/%d\n", ! node->no_id, ! pm.large_tuples_t, pm.large_tuples_c); /* *************** *** 5650,5655 **** check_set_subscriber(int set_id, int node_id, PGconn *local_dbconn) { - - SlonDString query1; PGresult *res; --- 5735,5738 ---- *************** *** 5675,5679 **** --- 5758,5806 ---- PQclear(res); return 1; + } + static void init_perfmon(PerfMon *perf_info) { + perf_info->prov_query_t = 0.0; + perf_info->prov_query_c = 0; + perf_info->subscr_query_t = 0.0; + perf_info->subscr_query_c = 0; + perf_info->subscr_iud__t = 0.0; + perf_info->subscr_iud__c = 0; + perf_info->large_tuples_t = 0; + perf_info->large_tuples_c = 0; + perf_info->num_inserts = 0; + perf_info->num_updates = 0; + perf_info->num_deletes = 0; + } + static void start_monitored_event(PerfMon *perf_info) { + gettimeofday(&(perf_info->prev_t), NULL); + } + static void monitor_subscriber_query(PerfMon *perf_info) { + double diff; + gettimeofday(&(perf_info->now_t), NULL); + diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t)); + (perf_info->subscr_query_t) += diff; + (perf_info->subscr_query_c) ++; + } + static void monitor_provider_query(PerfMon *perf_info) { + double diff; + gettimeofday(&(perf_info->now_t), NULL); + diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t)); + (perf_info->prov_query_t) += diff; + (perf_info->prov_query_c) ++; + } + static void monitor_subscriber_iud(PerfMon *perf_info) { + double diff; + gettimeofday(&(perf_info->now_t), NULL); + diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t)); + (perf_info->subscr_iud__t) += diff; + (perf_info->subscr_iud__c) ++; + } + static void monitor_largetuples(PerfMon *perf_info) { + double diff; + gettimeofday(&(perf_info->now_t), NULL); + diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t)); + (perf_info->large_tuples_t) += diff; + (perf_info->large_tuples_c) ++; }
- Previous message: [Slony1-commit] slony1-engine/src/slon slon.c
- Next message: [Slony1-commit] slony1-engine/src/slon remote_worker.c
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-commit mailing list