Wed Aug 10 22:40:16 PDT 2005
- Previous message: [Slony1-general] query too complex after subscribing a set with big tables at busy time
- Next message: [Slony1-general] query too complex after subscribing a set with big tables at busy time
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Hannu Krosing <hannu at skype.net> writes: > On T, 2005-08-09 at 17:07 -0400, Christopher Browne wrote: > >> I'm not sure a temp table will work, based on where the actionseq values >> come from :-(. > > If the usual case is also a continuous list of integers, I see no real > need for making it more complex. It would have been the last effort if > the integers had been sparse but still plenty; > >> Alas, the problem is that elegant Pythonic answers aren't much good for >> helping with code in C, as C hasn't got the string parsing capabilities, >> and allocating memory dynamically requires a fair bit of work "by hand." > > I _tried_ to write as C-like (and not really elegant :) ) code as > possible, so that it would be useful for actual C coding - there are > only 4 variables ( n, n1, range_start, range_end ), the list is scanned > once and the actual query parts can be added also continuously when > ready. > > And I guess that these integers come from some previous query, so they > are also kind of "pre-allocated". > >> I've got a FSM whiteboarded that will efficiently parse the list of >> integers (when you can use an FSM, your problem has "blazingly fast" >> written all over it!); I'll have to do a bit of thinking about what to >> do about processing those integers :-). >> >> I now know the FSM part is easy; I'll have to sleep on the rest... > > my python code was just for testing the processing algorithm (I got it > work right in 4 tries :) > > ... I have a patch that passes the "test_1_pgbench" test, which definitely exercises it. (Albeit not with Just Insanely Big Queries...) If the set of "actionseq" values are being returned in random order, it won't turn out terribly well, but if they are in even a fairly approximate ordering, it'll be a BIG help. And it looks as though they are returned more or less in order. I'd like for someone else to "use this patch in anger" before I consider adding it into CVS HEAD. In particular, can Jan comment on how sl_setsync gets populated? Do I get to assume a reasonably strong ordering of the elements embedded in sl_setsync.ssy_action_list? Index: remote_worker.c =================================================================== RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/remote_worker.c,v retrieving revision 1.88 diff -c -u -r1.88 remote_worker.c --- remote_worker.c 8 Aug 2005 15:51:18 -0000 1.88 +++ remote_worker.c 10 Aug 2005 21:23:19 -0000 @@ -255,6 +255,8 @@ static int logarchive_tracking (const char *namespace, int sub_set, const char *firstseq, const char *seqbuf); static int write_void_log (int node_id, char *seqbuf, const char *message); +static void compress_actionseq (const char *ssy_actionseq, SlonDString *action_subquery); + #define TERMINATE_QUERY_AND_ARCHIVE dstring_free(&query); terminate_log_archive(); /* @@ -3677,6 +3679,9 @@ SlonDString new_qual; SlonDString query; SlonDString *provider_qual; + SlonDString actionseq_subquery; + + int actionlist_len; gettimeofday(&tv_start, NULL); slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: SYNC " INT64_FORMAT @@ -4002,12 +4007,20 @@ slon_appendquery(provider_qual, "(log_xid >= '%s')", ssy_maxxid); - if (strlen(ssy_action_list) != 0) - slon_appendquery(provider_qual, - " and log_actionseq not in (%s)\n) ", - ssy_action_list); - else + actionlist_len = strlen(ssy_action_list); + slon_log(SLON_DEBUG2, " ssy_action_list value: %s length: %d\n", + ssy_action_list, actionlist_len); + + if (actionlist_len == 0) { slon_appendquery(provider_qual, "\n) "); + } else { + dstring_init(&actionseq_subquery); + compress_actionseq(ssy_action_list, &actionseq_subquery); + slon_appendquery(provider_qual, + " and (%s)\n) ", + dstring_data(&actionseq_subquery)); + dstring_free(&actionseq_subquery); + } PQclear(res2); @@ -4963,3 +4976,181 @@ rc = close_log_archive(); return rc; } + +/* given a string consisting of a list of actionseq values, return a + string that compresses this into a set of log_actionseq ranges */ + +/* Thus, "'13455','13456','13457','13458','13459','13460','13462'" + compresses into... + + log_actionseq not between '13455' and '13460' and + log_actionseq <> '13462' +*/ + +#define START_STATE 1 +#define COLLECTING_DIGITS 2 +#define BETWEEN_NUMBERS 3 +#define DONE 4 + +void compress_actionseq (const char *ssy_actionlist, SlonDString *action_subquery) { + int state; + int curr_number, curr_min, curr_max; + int curr_digit; + int first_subquery; + char curr_char; + first_subquery = 1; + state = START_STATE; + slon_mkquery(action_subquery, " "); + + while (state != DONE) { + curr_char = *ssy_actionlist; + switch (curr_char) { + case '\0': + state = DONE; + break; + case '0': + curr_digit = 0; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '1': + curr_digit = 1; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '2': + curr_digit = 2; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '3': + curr_digit = 3; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '4': + curr_digit = 4; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '5': + curr_digit = 5; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '6': + curr_digit = 6; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '7': + curr_digit = 7; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '8': + curr_digit = 8; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '9': + curr_digit = 9; + if (state == COLLECTING_DIGITS) { + curr_number = curr_number * 10 + curr_digit; + } else { + state = COLLECTING_DIGITS; + curr_number = curr_digit; + } + break; + case '\'': + case ',': + if (state == COLLECTING_DIGITS) { + /* Finished another number... Fold it into the ranges... */ +/* slon_log(SLON_DEBUG2, "Finished number: %d\n", curr_number); */ + /* If we haven't a range, then the range is the current + number */ + if ((curr_min == 0) || (curr_max == 0)) { + curr_min = curr_number; + curr_max = curr_number; + } + /* If the number pushes the range outwards by 1, + then shift the range by 1... */ + if (curr_number == curr_min - 1) { + curr_min --; + } + if (curr_number == curr_max + 1) { + curr_max ++; + } + + /* If the number is inside the range, do nothing */ + if ((curr_number >= curr_min) && (curr_number <= curr_max)) { + /* Do nothing - inside the range */ + } + + /* If the number is outside the range, then + generate a subquery based on the range, and + have the new number become the range */ + if ((curr_number < curr_min - 1) || (curr_number > curr_max + 1)) { + if (first_subquery) { + first_subquery = 0; + } else { + slon_appendquery(action_subquery, " and "); + } + if (curr_max == curr_min) { +/* slon_log(SLON_DEBUG2, "simple entry - %d\n", curr_max); */ + slon_appendquery(action_subquery, + " log_actionseq <> '%d' ", curr_max); + } else { +/* slon_log(SLON_DEBUG2, "between entry - %d %d\n", */ +/* curr_min, curr_max); */ + slon_appendquery(action_subquery, + " log_actionseq not between '%d' and '%d' ", + curr_min, curr_max); + } + curr_min = curr_number; + curr_max = curr_number; + } + } + state = BETWEEN_NUMBERS; + curr_number = 0; + } + ssy_actionlist++; + } + slon_log(SLON_DEBUG2, " compressed actionseq subquery... %s\n", dstring_data(action_subquery)); +} -- "cbbrowne","@","ca.afilias.info" <http://dev6.int.libertyrms.com/> Christopher Browne (416) 673-4124 (land)
- Previous message: [Slony1-general] query too complex after subscribing a set with big tables at busy time
- Next message: [Slony1-general] query too complex after subscribing a set with big tables at busy time
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-general mailing list