Drizzled Public API Documentation

row0purge.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1997, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "row0purge.h"
27 
28 #ifdef UNIV_NONINL
29 #include "row0purge.ic"
30 #endif
31 
32 #include "fsp0fsp.h"
33 #include "mach0data.h"
34 #include "trx0rseg.h"
35 #include "trx0trx.h"
36 #include "trx0roll.h"
37 #include "trx0undo.h"
38 #include "trx0purge.h"
39 #include "trx0rec.h"
40 #include "que0que.h"
41 #include "row0row.h"
42 #include "row0upd.h"
43 #include "row0vers.h"
44 #include "row0mysql.h"
45 #include "log0log.h"
46 
47 /*************************************************************************
48 IMPORTANT NOTE: Any operation that generates redo MUST check that there
49 is enough space in the redo log before for that operation. This is
50 done by calling log_free_check(). The reason for checking the
51 availability of the redo log space before the start of the operation is
52 that we MUST not hold any synchonization objects when performing the
53 check.
54 If you make a change in this module make sure that no codepath is
55 introduced where a call to log_free_check() is bypassed. */
56 
57 /*************************************************************************
58 IMPORTANT NOTE: Any operation that generates redo MUST check that there
59 is enough space in the redo log before for that operation. This is
60 done by calling log_free_check(). The reason for checking the
61 availability of the redo log space before the start of the operation is
62 that we MUST not hold any synchonization objects when performing the
63 check.
64 If you make a change in this module make sure that no codepath is
65 introduced where a call to log_free_check() is bypassed. */
66 
67 /********************************************************************/
70 UNIV_INTERN
73 /*==================*/
74  que_thr_t* parent,
75  mem_heap_t* heap)
76 {
77  purge_node_t* node;
78 
79  ut_ad(parent && heap);
80 
81  node = static_cast<purge_node_t *>(mem_heap_alloc(heap, sizeof(purge_node_t)));
82 
83  node->common.type = QUE_NODE_PURGE;
84  node->common.parent = parent;
85 
86  node->heap = mem_heap_create(256);
87 
88  return(node);
89 }
90 
91 /***********************************************************/
95 static
96 ibool
97 row_purge_reposition_pcur(
98 /*======================*/
99  ulint mode,
100  purge_node_t* node,
101  mtr_t* mtr)
102 {
103  ibool found;
104 
105  if (node->found_clust) {
106  found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
107 
108  return(found);
109  }
110 
111  found = row_search_on_row_ref(&(node->pcur), mode, node->table,
112  node->ref, mtr);
113  node->found_clust = found;
114 
115  if (found) {
116  btr_pcur_store_position(&(node->pcur), mtr);
117  }
118 
119  return(found);
120 }
121 
122 /***********************************************************/
126 static
127 ibool
128 row_purge_remove_clust_if_poss_low(
129 /*===============================*/
130  purge_node_t* node,
131  ulint mode)
132 {
133  dict_index_t* index;
134  btr_pcur_t* pcur;
135  btr_cur_t* btr_cur;
136  ibool success;
137  ulint err;
138  mtr_t mtr;
139  rec_t* rec;
140  mem_heap_t* heap = NULL;
141  ulint offsets_[REC_OFFS_NORMAL_SIZE];
142  rec_offs_init(offsets_);
143 
144  index = dict_table_get_first_index(node->table);
145 
146  pcur = &(node->pcur);
147  btr_cur = btr_pcur_get_btr_cur(pcur);
148 
149  log_free_check();
150  mtr_start(&mtr);
151 
152  success = row_purge_reposition_pcur(mode, node, &mtr);
153 
154  if (!success) {
155  /* The record is already removed */
156 
157  btr_pcur_commit_specify_mtr(pcur, &mtr);
158 
159  return(TRUE);
160  }
161 
162  rec = btr_pcur_get_rec(pcur);
163 
164  if (node->roll_ptr != row_get_rec_roll_ptr(
165  rec, index, rec_get_offsets(rec, index, offsets_,
166  ULINT_UNDEFINED, &heap))) {
167  if (UNIV_LIKELY_NULL(heap)) {
168  mem_heap_free(heap);
169  }
170  /* Someone else has modified the record later: do not remove */
171  btr_pcur_commit_specify_mtr(pcur, &mtr);
172 
173  return(TRUE);
174  }
175 
176  if (UNIV_LIKELY_NULL(heap)) {
177  mem_heap_free(heap);
178  }
179 
180  if (mode == BTR_MODIFY_LEAF) {
181  success = btr_cur_optimistic_delete(btr_cur, &mtr);
182  } else {
183  ut_ad(mode == BTR_MODIFY_TREE);
184  btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
185  RB_NONE, &mtr);
186 
187  if (err == DB_SUCCESS) {
188  success = TRUE;
189  } else if (err == DB_OUT_OF_FILE_SPACE) {
190  success = FALSE;
191  } else {
192  ut_error;
193  }
194  }
195 
196  btr_pcur_commit_specify_mtr(pcur, &mtr);
197 
198  return(success);
199 }
200 
201 /***********************************************************/
204 static
205 void
206 row_purge_remove_clust_if_poss(
207 /*===========================*/
208  purge_node_t* node)
209 {
210  ibool success;
211  ulint n_tries = 0;
212 
213  /* fputs("Purge: Removing clustered record\n", stderr); */
214 
215  success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
216  if (success) {
217 
218  return;
219  }
220 retry:
221  success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
222  /* The delete operation may fail if we have little
223  file space left: TODO: easiest to crash the database
224  and restart with more file space */
225 
226  if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
227  n_tries++;
228 
230 
231  goto retry;
232  }
233 
234  ut_a(success);
235 }
236 
237 /***********************************************************/
252 UNIV_INTERN
253 ibool
255 /*===============*/
256  purge_node_t* node,
257  dict_index_t* index,
258  const dtuple_t* entry)
259 {
260  ibool can_delete;
261  mtr_t mtr;
262 
263  ut_ad(!dict_index_is_clust(index));
264  mtr_start(&mtr);
265 
266  can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
268  btr_pcur_get_rec(&node->pcur),
269  &mtr, index, entry);
270 
271  btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
272 
273  return(can_delete);
274 }
275 
276 /***************************************************************
277 Removes a secondary index entry if possible, by modifying the
278 index tree. Does not try to buffer the delete.
279 @return TRUE if success or if not found */
280 static
281 ibool
282 row_purge_remove_sec_if_poss_tree(
283 /*==============================*/
284  purge_node_t* node,
285  dict_index_t* index,
286  const dtuple_t* entry)
287 {
288  btr_pcur_t pcur;
289  btr_cur_t* btr_cur;
290  ibool success = TRUE;
291  ulint err;
292  mtr_t mtr;
293  enum row_search_result search_result;
294 
295  log_free_check();
296  mtr_start(&mtr);
297 
298  search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
299  &pcur, &mtr);
300 
301  switch (search_result) {
302  case ROW_NOT_FOUND:
303  /* Not found. This is a legitimate condition. In a
304  rollback, InnoDB will remove secondary recs that would
305  be purged anyway. Then the actual purge will not find
306  the secondary index record. Also, the purge itself is
307  eager: if it comes to consider a secondary index
308  record, and notices it does not need to exist in the
309  index, it will remove it. Then if/when the purge
310  comes to consider the secondary index record a second
311  time, it will not exist any more in the index. */
312 
313  /* fputs("PURGE:........sec entry not found\n", stderr); */
314  /* dtuple_print(stderr, entry); */
315  goto func_exit;
316  case ROW_FOUND:
317  break;
318  case ROW_BUFFERED:
319  case ROW_NOT_DELETED_REF:
320  /* These are invalid outcomes, because the mode passed
321  to row_search_index_entry() did not include any of the
322  flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
323  ut_error;
324  }
325 
326  btr_cur = btr_pcur_get_btr_cur(&pcur);
327 
328  /* We should remove the index record if no later version of the row,
329  which cannot be purged yet, requires its existence. If some requires,
330  we should do nothing. */
331 
332  if (row_purge_poss_sec(node, index, entry)) {
333  /* Remove the index record, which should have been
334  marked for deletion. */
335  ut_ad(REC_INFO_DELETED_FLAG
337  dict_table_is_comp(index->table)));
338 
339  btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
340  RB_NONE, &mtr);
341  switch (UNIV_EXPECT(err, DB_SUCCESS)) {
342  case DB_SUCCESS:
343  break;
344  case DB_OUT_OF_FILE_SPACE:
345  success = FALSE;
346  break;
347  default:
348  ut_error;
349  }
350  }
351 
352 func_exit:
353  btr_pcur_close(&pcur);
354  mtr_commit(&mtr);
355 
356  return(success);
357 }
358 
359 /***************************************************************
360 Removes a secondary index entry without modifying the index tree,
361 if possible.
362 @return TRUE if success or if not found */
363 static
364 ibool
365 row_purge_remove_sec_if_poss_leaf(
366 /*==============================*/
367  purge_node_t* node,
368  dict_index_t* index,
369  const dtuple_t* entry)
370 {
371  mtr_t mtr;
372  btr_pcur_t pcur;
373  enum row_search_result search_result;
374 
375  log_free_check();
376 
377  mtr_start(&mtr);
378 
379  /* Set the purge node for the call to row_purge_poss_sec(). */
380  pcur.btr_cur.purge_node = node;
381  /* Set the query thread, so that ibuf_insert_low() will be
382  able to invoke thd_get_trx(). */
383  pcur.btr_cur.thr = static_cast<que_thr_t *>(que_node_get_parent(node));
384 
385  search_result = row_search_index_entry(
386  index, entry, BTR_MODIFY_LEAF | BTR_DELETE, &pcur, &mtr);
387 
388  switch (search_result) {
389  ibool success;
390  case ROW_FOUND:
391  /* Before attempting to purge a record, check
392  if it is safe to do so. */
393  if (row_purge_poss_sec(node, index, entry)) {
394  btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
395 
396  /* Only delete-marked records should be purged. */
397  ut_ad(REC_INFO_DELETED_FLAG
399  btr_cur_get_rec(btr_cur),
400  dict_table_is_comp(index->table)));
401 
402  if (!btr_cur_optimistic_delete(btr_cur, &mtr)) {
403 
404  /* The index entry could not be deleted. */
405  success = FALSE;
406  goto func_exit;
407  }
408  }
409  /* fall through (the index entry is still needed,
410  or the deletion succeeded) */
411  case ROW_NOT_DELETED_REF:
412  /* The index entry is still needed. */
413  case ROW_BUFFERED:
414  /* The deletion was buffered. */
415  case ROW_NOT_FOUND:
416  /* The index entry does not exist, nothing to do. */
417  success = TRUE;
418  func_exit:
419  btr_pcur_close(&pcur);
420  mtr_commit(&mtr);
421  return(success);
422  }
423 
424  ut_error;
425  return(FALSE);
426 }
427 
428 /***********************************************************/
430 UNIV_INLINE
431 void
432 row_purge_remove_sec_if_poss(
433 /*=========================*/
434  purge_node_t* node,
435  dict_index_t* index,
436  dtuple_t* entry)
437 {
438  ibool success;
439  ulint n_tries = 0;
440 
441  /* fputs("Purge: Removing secondary record\n", stderr); */
442 
443  if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
444 
445  return;
446  }
447 retry:
448  success = row_purge_remove_sec_if_poss_tree(node, index, entry);
449  /* The delete operation may fail if we have little
450  file space left: TODO: easiest to crash the database
451  and restart with more file space */
452 
453  if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
454 
455  n_tries++;
456 
458 
459  goto retry;
460  }
461 
462  ut_a(success);
463 }
464 
465 /***********************************************************/
467 static
468 void
469 row_purge_del_mark(
470 /*===============*/
471  purge_node_t* node)
472 {
473  mem_heap_t* heap;
474  dtuple_t* entry;
475  dict_index_t* index;
476 
477  ut_ad(node);
478 
479  heap = mem_heap_create(1024);
480 
481  while (node->index != NULL) {
482  index = node->index;
483 
484  /* Build the index entry */
485  entry = row_build_index_entry(node->row, NULL, index, heap);
486  ut_a(entry);
487  row_purge_remove_sec_if_poss(node, index, entry);
488 
489  node->index = dict_table_get_next_index(node->index);
490  }
491 
492  mem_heap_free(heap);
493 
494  row_purge_remove_clust_if_poss(node);
495 }
496 
497 /***********************************************************/
500 static
501 void
502 row_purge_upd_exist_or_extern(
503 /*==========================*/
504  purge_node_t* node)
505 {
506  mem_heap_t* heap;
507  dtuple_t* entry;
508  dict_index_t* index;
509  ibool is_insert;
510  ulint rseg_id;
511  ulint page_no;
512  ulint offset;
513  ulint i;
514  mtr_t mtr;
515 
516  ut_ad(node);
517 
518  if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
519 
520  goto skip_secondaries;
521  }
522 
523  heap = mem_heap_create(1024);
524 
525  while (node->index != NULL) {
526  index = node->index;
527 
528  if (row_upd_changes_ord_field_binary(NULL, node->index,
529  node->update)) {
530  /* Build the older version of the index entry */
531  entry = row_build_index_entry(node->row, NULL,
532  index, heap);
533  ut_a(entry);
534  row_purge_remove_sec_if_poss(node, index, entry);
535  }
536 
537  node->index = dict_table_get_next_index(node->index);
538  }
539 
540  mem_heap_free(heap);
541 
542 skip_secondaries:
543  /* Free possible externally stored fields */
544  for (i = 0; i < upd_get_n_fields(node->update); i++) {
545 
546  const upd_field_t* ufield
547  = upd_get_nth_field(node->update, i);
548 
549  if (dfield_is_ext(&ufield->new_val)) {
550  buf_block_t* block;
551  ulint internal_offset;
552  byte* data_field;
553 
554  /* We use the fact that new_val points to
555  node->undo_rec and get thus the offset of
556  dfield data inside the undo record. Then we
557  can calculate from node->roll_ptr the file
558  address of the new_val data */
559 
560  internal_offset
561  = ((const byte*)
562  dfield_get_data(&ufield->new_val))
563  - node->undo_rec;
564 
565  ut_a(internal_offset < UNIV_PAGE_SIZE);
566 
567  trx_undo_decode_roll_ptr(node->roll_ptr,
568  &is_insert, &rseg_id,
569  &page_no, &offset);
570  mtr_start(&mtr);
571 
572  /* We have to acquire an X-latch to the clustered
573  index tree */
574 
575  index = dict_table_get_first_index(node->table);
576 
577  mtr_x_lock(dict_index_get_lock(index), &mtr);
578 
579  /* NOTE: we must also acquire an X-latch to the
580  root page of the tree. We will need it when we
581  free pages from the tree. If the tree is of height 1,
582  the tree X-latch does NOT protect the root page,
583  because it is also a leaf page. Since we will have a
584  latch on an undo log page, we would break the
585  latching order if we would only later latch the
586  root page of such a tree! */
587 
588  btr_root_get(index, &mtr);
589 
590  /* We assume in purge of externally stored fields
591  that the space id of the undo log record is 0! */
592 
593  block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
594  buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
595 
596  data_field = buf_block_get_frame(block)
597  + offset + internal_offset;
598 
599  ut_a(dfield_get_len(&ufield->new_val)
601  btr_free_externally_stored_field(
602  index,
603  data_field + dfield_get_len(&ufield->new_val)
605  NULL, NULL, NULL, 0, RB_NONE, &mtr);
606  mtr_commit(&mtr);
607  }
608  }
609 }
610 
611 /***********************************************************/
615 static
616 ibool
617 row_purge_parse_undo_rec(
618 /*=====================*/
619  purge_node_t* node,
620  ibool* updated_extern,
623  que_thr_t* thr)
624 {
625  dict_index_t* clust_index;
626  byte* ptr;
627  trx_t* trx;
628  undo_no_t undo_no;
629  table_id_t table_id;
630  trx_id_t trx_id;
631  roll_ptr_t roll_ptr;
632  ulint info_bits;
633  ulint type;
634  ulint cmpl_info;
635 
636  ut_ad(node && thr);
637 
638  trx = thr_get_trx(thr);
639 
640  ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
641  updated_extern, &undo_no, &table_id);
642  node->rec_type = type;
643 
644  if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
645 
646  return(FALSE);
647  }
648 
649  ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
650  &info_bits);
651  node->table = NULL;
652 
653  if (type == TRX_UNDO_UPD_EXIST_REC
654  && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
655 
656  /* Purge requires no changes to indexes: we may return */
657 
658  return(FALSE);
659  }
660 
661  /* Prevent DROP TABLE etc. from running when we are doing the purge
662  for this row */
663 
664  row_mysql_freeze_data_dictionary(trx);
665 
666  mutex_enter(&(dict_sys->mutex));
667 
668  node->table = dict_table_get_on_id_low(table_id);
669 
670  mutex_exit(&(dict_sys->mutex));
671 
672  if (node->table == NULL) {
673  /* The table has been dropped: no need to do purge */
674 err_exit:
676  return(FALSE);
677  }
678 
679  if (node->table->ibd_file_missing) {
680  /* We skip purge of missing .ibd files */
681 
682  node->table = NULL;
683 
684  goto err_exit;
685  }
686 
687  clust_index = dict_table_get_first_index(node->table);
688 
689  if (clust_index == NULL) {
690  /* The table was corrupt in the data dictionary */
691 
692  goto err_exit;
693  }
694 
695  ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
696  node->heap);
697 
698  ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
699  roll_ptr, info_bits, trx,
700  node->heap, &(node->update));
701 
702  /* Read to the partial row the fields that occur in indexes */
703 
704  if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
706  ptr, clust_index, &node->row,
707  type == TRX_UNDO_UPD_DEL_REC,
708  node->heap);
709  }
710 
711  return(TRUE);
712 }
713 
714 /***********************************************************/
719 static
720 ulint
721 row_purge(
722 /*======*/
723  purge_node_t* node,
724  que_thr_t* thr)
725 {
726  roll_ptr_t roll_ptr;
727  ibool purge_needed;
728  ibool updated_extern;
729  trx_t* trx;
730 
731  ut_ad(node && thr);
732 
733  trx = thr_get_trx(thr);
734 
735  node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
736  &(node->reservation),
737  node->heap);
738  if (!node->undo_rec) {
739  /* Purge completed for this query thread */
740 
741  thr->run_node = que_node_get_parent(node);
742 
743  return(DB_SUCCESS);
744  }
745 
746  node->roll_ptr = roll_ptr;
747 
748  if (node->undo_rec == &trx_purge_dummy_rec) {
749  purge_needed = FALSE;
750  } else {
751  purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
752  thr);
753  /* If purge_needed == TRUE, we must also remember to unfreeze
754  data dictionary! */
755  }
756 
757  if (purge_needed) {
758  node->found_clust = FALSE;
759 
760  node->index = dict_table_get_next_index(
761  dict_table_get_first_index(node->table));
762 
763  if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
764  row_purge_del_mark(node);
765 
766  } else if (updated_extern
767  || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
768 
769  row_purge_upd_exist_or_extern(node);
770  }
771 
772  if (node->found_clust) {
773  btr_pcur_close(&(node->pcur));
774  }
775 
777  }
778 
779  /* Do some cleanup */
780  trx_purge_rec_release(node->reservation);
781  mem_heap_empty(node->heap);
782 
783  thr->run_node = node;
784 
785  return(DB_SUCCESS);
786 }
787 
788 /***********************************************************/
792 UNIV_INTERN
793 que_thr_t*
795 /*===========*/
796  que_thr_t* thr)
797 {
798  purge_node_t* node;
799 #ifdef UNIV_DEBUG
800  ulint err;
801 #endif /* UNIV_DEBUG */
802 
803  ut_ad(thr);
804 
805  node = static_cast<purge_node_t *>(thr->run_node);
806 
807  ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
808 
809 #ifdef UNIV_DEBUG
810  err =
811 #endif /* UNIV_DEBUG */
812  row_purge(node, thr);
813 
814 #ifdef UNIV_DEBUG
815  ut_a(err == DB_SUCCESS);
816 #endif /* UNIV_DEBUG */
817 
818  return(thr);
819 }