Drizzled Public API Documentation

ibuf0ibuf.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1997, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "ibuf0ibuf.h"
27 
29 #define IBUF_BITS_PER_PAGE 4
30 #if IBUF_BITS_PER_PAGE % 2
31 # error "IBUF_BITS_PER_PAGE must be an even number!"
32 #endif
33 
34 #define IBUF_BITMAP PAGE_DATA
35 
36 #ifdef UNIV_NONINL
37 #include "ibuf0ibuf.ic"
38 #endif
39 
40 #ifndef UNIV_HOTBACKUP
41 
42 #include "buf0buf.h"
43 #include "buf0rea.h"
44 #include "fsp0fsp.h"
45 #include "trx0sys.h"
46 #include "fil0fil.h"
47 #include "thr0loc.h"
48 #include "rem0rec.h"
49 #include "btr0cur.h"
50 #include "btr0pcur.h"
51 #include "btr0btr.h"
52 #include "row0upd.h"
53 #include "sync0sync.h"
54 #include "dict0boot.h"
55 #include "fut0lst.h"
56 #include "lock0lock.h"
57 #include "log0recv.h"
58 #include "que0que.h"
59 #include "srv0start.h" /* srv_shutdown_state */
60 
61 /* STRUCTURE OF AN INSERT BUFFER RECORD
62 
63 In versions < 4.1.x:
64 
65 1. The first field is the page number.
66 2. The second field is an array which stores type info for each subsequent
67  field. We store the information which affects the ordering of records, and
68  also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
69  is 10 bytes.
70 3. Next we have the fields of the actual index record.
71 
72 In versions >= 4.1.x:
73 
74 Note that contary to what we planned in the 1990's, there will only be one
75 insert buffer tree, and that is in the system tablespace of InnoDB.
76 
77 1. The first field is the space id.
78 2. The second field is a one-byte marker (0) which differentiates records from
79  the < 4.1.x storage format.
80 3. The third field is the page number.
81 4. The fourth field contains the type info, where we have also added 2 bytes to
82  store the charset. In the compressed table format of 5.0.x we must add more
83  information here so that we can build a dummy 'index' struct which 5.0.x
84  can use in the binary search on the index page in the ibuf merge phase.
85 5. The rest of the fields contain the fields of the actual index record.
86 
87 In versions >= 5.0.3:
88 
89 The first byte of the fourth field is an additional marker (0) if the record
90 is in the compact format. The presence of this marker can be detected by
91 looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
92 
93 The high-order bit of the character set field in the type info is the
94 "nullable" flag for the field.
95 
96 In versions >= 5.5:
97 
98 The optional marker byte at the start of the fourth field is replaced by
99 mandatory 3 fields, totaling 4 bytes:
100 
101  1. 2 bytes: Counter field, used to sort records within a (space id, page
102  no) in the order they were added. This is needed so that for example the
103  sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
104  correctly.
105 
106  2. 1 byte: Operation type (see ibuf_op_t).
107 
108  3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
109 
110 To ensure older records, which do not have counters to enforce correct
111 sorting, are merged before any new records, ibuf_insert checks if we're
112 trying to insert to a position that contains old-style records, and if so,
113 refuses the insert. Thus, ibuf pages are gradually converted to the new
114 format as their corresponding buffer pool pages are read into memory.
115 */
116 
117 
118 /* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
119 
120 If an OS thread performs any operation that brings in disk pages from
121 non-system tablespaces into the buffer pool, or creates such a page there,
122 then the operation may have as a side effect an insert buffer index tree
123 compression. Thus, the tree latch of the insert buffer tree may be acquired
124 in the x-mode, and also the file space latch of the system tablespace may
125 be acquired in the x-mode.
126 
127 Also, an insert to an index in a non-system tablespace can have the same
128 effect. How do we know this cannot lead to a deadlock of OS threads? There
129 is a problem with the i\o-handler threads: they break the latching order
130 because they own x-latches to pages which are on a lower level than the
131 insert buffer tree latch, its page latches, and the tablespace latch an
132 insert buffer operation can reserve.
133 
134 The solution is the following: Let all the tree and page latches connected
135 with the insert buffer be later in the latching order than the fsp latch and
136 fsp page latches.
137 
138 Insert buffer pages must be such that the insert buffer is never invoked
139 when these pages are accessed as this would result in a recursion violating
140 the latching order. We let a special i/o-handler thread take care of i/o to
141 the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
142 pages and the first inode page, which contains the inode of the ibuf tree: let
143 us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
144 access both non-ibuf and ibuf pages.
145 
146 Then an i/o-handler for the insert buffer never needs to access recursively the
147 insert buffer tree and thus obeys the latching order. On the other hand, other
148 i/o-handlers for other tablespaces may require access to the insert buffer,
149 but because all kinds of latches they need to access there are later in the
150 latching order, no violation of the latching order occurs in this case,
151 either.
152 
153 A problem is how to grow and contract an insert buffer tree. As it is later
154 in the latching order than the fsp management, we have to reserve the fsp
155 latch first, before adding or removing pages from the insert buffer tree.
156 We let the insert buffer tree have its own file space management: a free
157 list of pages linked to the tree root. To prevent recursive using of the
158 insert buffer when adding pages to the tree, we must first load these pages
159 to memory, obtaining a latch on them, and only after that add them to the
160 free list of the insert buffer tree. More difficult is removing of pages
161 from the free list. If there is an excess of pages in the free list of the
162 ibuf tree, they might be needed if some thread reserves the fsp latch,
163 intending to allocate more file space. So we do the following: if a thread
164 reserves the fsp latch, we check the writer count field of the latch. If
165 this field has value 1, it means that the thread did not own the latch
166 before entering the fsp system, and the mtr of the thread contains no
167 modifications to the fsp pages. Now we are free to reserve the ibuf latch,
168 and check if there is an excess of pages in the free list. We can then, in a
169 separate mini-transaction, take them out of the free list and free them to
170 the fsp system.
171 
172 To avoid deadlocks in the ibuf system, we divide file pages into three levels:
173 
174 (1) non-ibuf pages,
175 (2) ibuf tree pages and the pages in the ibuf tree free list, and
176 (3) ibuf bitmap pages.
177 
178 No OS thread is allowed to access higher level pages if it has latches to
179 lower level pages; even if the thread owns a B-tree latch it must not access
180 the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
181 is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
182 exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
183 level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
184 it uses synchronous aio, it can access any pages, as long as it obeys the
185 access order rules. */
186 
188 #define IBUF_POOL_SIZE_PER_MAX_SIZE 2
189 
191 #define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
192 
194 UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL;
195 
196 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
197 
198 UNIV_INTERN uint ibuf_debug;
199 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
200 
202 UNIV_INTERN ibuf_t* ibuf = NULL;
203 
205 UNIV_INTERN ulint ibuf_flush_count = 0;
206 
207 #ifdef UNIV_PFS_MUTEX
208 UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
209 UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key;
210 UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key;
211 #endif /* UNIV_PFS_MUTEX */
212 
213 #ifdef UNIV_IBUF_COUNT_DEBUG
214 
215 #define IBUF_COUNT_N_SPACES 4
216 
217 #define IBUF_COUNT_N_PAGES 130000
218 
220 static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
221 
222 /******************************************************************/
224 UNIV_INLINE
225 void
226 ibuf_count_check(
227 /*=============*/
228  ulint space_id,
229  ulint page_no)
230 {
231  if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
232  return;
233  }
234 
235  fprintf(stderr,
236  "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
237  "InnoDB: and breaks crash recovery.\n"
238  "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
239  "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
240  (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
241  (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
242  ut_error;
243 }
244 #endif
245 
247 /* @{ */
248 #define IBUF_BITMAP_FREE 0
250 #define IBUF_BITMAP_BUFFERED 2
252 #define IBUF_BITMAP_IBUF 3
256 /* @} */
257 
258 /* Various constants for checking the type of an ibuf record and extracting
259 data from it. For details, see the description of the record format at the
260 top of this file. */
261 
265 /* @{ */
266 #define IBUF_REC_INFO_SIZE 4
268 #if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
269 # error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
270 #endif
271 
272 /* Offsets for the fields at the beginning of the fourth field */
273 #define IBUF_REC_OFFSET_COUNTER 0
274 #define IBUF_REC_OFFSET_TYPE 2
275 #define IBUF_REC_OFFSET_FLAGS 3
277 /* Record flag masks */
278 #define IBUF_REC_COMPACT 0x1
285 static mutex_t ibuf_pessimistic_insert_mutex;
286 
288 static mutex_t ibuf_mutex;
289 
291 static mutex_t ibuf_bitmap_mutex;
292 
294 #define IBUF_MERGE_AREA 8
295 
299 #define IBUF_MERGE_THRESHOLD 4
300 
303 #define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
304 
308 #define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
309 
313 #define IBUF_CONTRACT_ON_INSERT_SYNC 5
314 
318 #define IBUF_CONTRACT_DO_NOT_INSERT 10
319 
320 /* TODO: how to cope with drop table if there are records in the insert
321 buffer for the indexes of the table? Is there actually any problem,
322 because ibuf merge is done to a page when it is read in, and it is
323 still physically like the index page even if the index would have been
324 dropped! So, there seems to be no problem. */
325 
326 /******************************************************************/
329 UNIV_INLINE
330 void
331 ibuf_enter(void)
332 /*============*/
333 {
334  ibool* ptr;
335 
337 
338  ut_ad(*ptr == FALSE);
339 
340  *ptr = TRUE;
341 }
342 
343 /******************************************************************/
346 UNIV_INLINE
347 void
348 ibuf_exit(void)
349 /*===========*/
350 {
351  ibool* ptr;
352 
354 
355  ut_ad(*ptr == TRUE);
356 
357  *ptr = FALSE;
358 }
359 
360 /******************************************************************/
367 UNIV_INTERN
368 ibool
369 ibuf_inside(void)
370 /*=============*/
371 {
372  return(*thr_local_get_in_ibuf_field());
373 }
374 
375 /******************************************************************/
378 static
379 page_t*
380 ibuf_header_page_get(
381 /*=================*/
382  mtr_t* mtr)
383 {
384  buf_block_t* block;
385 
386  ut_ad(!ibuf_inside());
387 
388  block = buf_page_get(
389  IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
390  buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
391 
392  return(buf_block_get_frame(block));
393 }
394 
395 /******************************************************************/
398 static
399 page_t*
400 ibuf_tree_root_get(
401 /*===============*/
402  mtr_t* mtr)
403 {
404  buf_block_t* block;
405  page_t* root;
406 
407  ut_ad(ibuf_inside());
408  ut_ad(mutex_own(&ibuf_mutex));
409 
410  mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
411 
412  block = buf_page_get(
413  IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
414 
415  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
416 
417  root = buf_block_get_frame(block);
418 
419  ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
420  ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
421  ut_ad(ibuf->empty == (page_get_n_recs(root) == 0));
422 
423  return(root);
424 }
425 
426 #ifdef UNIV_IBUF_COUNT_DEBUG
427 /******************************************************************/
431 UNIV_INTERN
432 ulint
433 ibuf_count_get(
434 /*===========*/
435  ulint space,
436  ulint page_no)
437 {
438  ibuf_count_check(space, page_no);
439 
440  return(ibuf_counts[space][page_no]);
441 }
442 
443 /******************************************************************/
445 static
446 void
447 ibuf_count_set(
448 /*===========*/
449  ulint space,
450  ulint page_no,
451  ulint val)
452 {
453  ibuf_count_check(space, page_no);
454  ut_a(val < UNIV_PAGE_SIZE);
455 
456  ibuf_counts[space][page_no] = val;
457 }
458 #endif
459 
460 /******************************************************************/
462 UNIV_INTERN
463 void
464 ibuf_close(void)
465 /*============*/
466 {
467  mutex_free(&ibuf_pessimistic_insert_mutex);
468  memset(&ibuf_pessimistic_insert_mutex,
469  0x0, sizeof(ibuf_pessimistic_insert_mutex));
470 
471  mutex_free(&ibuf_mutex);
472  memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
473 
474  mutex_free(&ibuf_bitmap_mutex);
475  memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
476 
477  mem_free(ibuf);
478  ibuf = NULL;
479 }
480 
481 /******************************************************************/
484 static
485 void
486 ibuf_size_update(
487 /*=============*/
488  const page_t* root,
489  mtr_t* mtr)
490 {
491  ut_ad(mutex_own(&ibuf_mutex));
492 
493  ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
494  + PAGE_BTR_IBUF_FREE_LIST, mtr);
495 
496  ibuf->height = 1 + btr_page_get_level(root, mtr);
497 
498  /* the '1 +' is the ibuf header page */
499  ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
500 }
501 
502 /******************************************************************/
505 UNIV_INTERN
506 void
507 ibuf_init_at_db_start(void)
508 /*=======================*/
509 {
510  page_t* root;
511  mtr_t mtr;
512  dict_table_t* table;
513  mem_heap_t* heap;
514  dict_index_t* index;
515  ulint n_used;
516  page_t* header_page;
517  ulint error;
518 
519  ibuf = static_cast<ibuf_t *>(mem_alloc(sizeof(ibuf_t)));
520 
521  memset(ibuf, 0, sizeof(*ibuf));
522 
523  /* Note that also a pessimistic delete can sometimes make a B-tree
524  grow in size, as the references on the upper levels of the tree can
525  change */
526 
527  ibuf->max_size = ut_min(buf_pool_get_curr_size() / UNIV_PAGE_SIZE
528  / IBUF_POOL_SIZE_PER_MAX_SIZE,
529  srv_ibuf_max_size / UNIV_PAGE_SIZE);
530  srv_ibuf_max_size = ibuf->max_size * UNIV_PAGE_SIZE;
531 
532  mutex_create(ibuf_pessimistic_insert_mutex_key,
533  &ibuf_pessimistic_insert_mutex,
534  SYNC_IBUF_PESS_INSERT_MUTEX);
535 
536  mutex_create(ibuf_mutex_key,
537  &ibuf_mutex, SYNC_IBUF_MUTEX);
538 
539  mutex_create(ibuf_bitmap_mutex_key,
540  &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
541 
542  mtr_start(&mtr);
543 
544  mutex_enter(&ibuf_mutex);
545 
546  mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
547 
548  header_page = ibuf_header_page_get(&mtr);
549 
550  fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
551  &n_used, &mtr);
552  ibuf_enter();
553 
554  ut_ad(n_used >= 2);
555 
556  ibuf->seg_size = n_used;
557 
558  {
559  buf_block_t* block;
560 
561  block = buf_page_get(
562  IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
563  RW_X_LATCH, &mtr);
564  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
565 
566  root = buf_block_get_frame(block);
567  }
568 
569  ibuf_size_update(root, &mtr);
570  mutex_exit(&ibuf_mutex);
571 
572  ibuf->empty = (page_get_n_recs(root) == 0);
573  mtr_commit(&mtr);
574 
575  ibuf_exit();
576 
577  heap = mem_heap_create(450);
578 
579  /* Use old-style record format for the insert buffer. */
580  table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
581 
582  dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
583 
584  table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
585 
586  dict_table_add_to_cache(table, heap);
587  mem_heap_free(heap);
588 
589  index = dict_mem_index_create(
590  IBUF_TABLE_NAME, "CLUST_IND",
591  IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
592 
593  dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
594 
595  index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
596 
597  error = dict_index_add_to_cache(table, index,
598  FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
599  ut_a(error == DB_SUCCESS);
600 
601  ibuf->index = dict_table_get_first_index(table);
602 }
603 #endif /* !UNIV_HOTBACKUP */
604 /*********************************************************************/
606 UNIV_INTERN
607 void
608 ibuf_bitmap_page_init(
609 /*==================*/
610  buf_block_t* block,
611  mtr_t* mtr)
612 {
613  page_t* page;
614  ulint byte_offset;
615  ulint zip_size = buf_block_get_zip_size(block);
616 
617  ut_a(ut_is_2pow(zip_size));
618 
619  page = buf_block_get_frame(block);
620  fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
621 
622  /* Write all zeros to the bitmap */
623 
624  if (!zip_size) {
625  byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
626  * IBUF_BITS_PER_PAGE);
627  } else {
628  byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
629  }
630 
631  memset(page + IBUF_BITMAP, 0, byte_offset);
632 
633  /* The remaining area (up to the page trailer) is uninitialized. */
634 
635 #ifndef UNIV_HOTBACKUP
637 #endif /* !UNIV_HOTBACKUP */
638 }
639 
640 /*********************************************************************/
643 UNIV_INTERN
644 byte*
645 ibuf_parse_bitmap_init(
646 /*===================*/
647  byte* ptr,
648  byte* /*end_ptr __attribute__((unused))*/,
649  buf_block_t* block,
650  mtr_t* mtr)
651 {
652  ut_ad(ptr && end_ptr);
653 
654  if (block) {
655  ibuf_bitmap_page_init(block, mtr);
656  }
657 
658  return(ptr);
659 }
660 #ifndef UNIV_HOTBACKUP
661 /********************************************************************/
664 UNIV_INLINE
665 ulint
666 ibuf_bitmap_page_get_bits(
667 /*======================*/
668  const page_t* page,
669  ulint page_no,
670  ulint zip_size,
672  ulint bit,
674  mtr_t* /*mtr __attribute__((unused))*/)
677 {
678  ulint byte_offset;
679  ulint bit_offset;
680  ulint map_byte;
681  ulint value;
682 
683  ut_ad(bit < IBUF_BITS_PER_PAGE);
684 #if IBUF_BITS_PER_PAGE % 2
685 # error "IBUF_BITS_PER_PAGE % 2 != 0"
686 #endif
687  ut_ad(ut_is_2pow(zip_size));
688  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
689 
690  if (!zip_size) {
691  bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
692  + bit;
693  } else {
694  bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
695  + bit;
696  }
697 
698  byte_offset = bit_offset / 8;
699  bit_offset = bit_offset % 8;
700 
701  ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
702 
703  map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
704 
705  value = ut_bit_get_nth(map_byte, bit_offset);
706 
707  if (bit == IBUF_BITMAP_FREE) {
708  ut_ad(bit_offset + 1 < 8);
709 
710  value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
711  }
712 
713  return(value);
714 }
715 
716 /********************************************************************/
718 static
719 void
720 ibuf_bitmap_page_set_bits(
721 /*======================*/
722  page_t* page,
723  ulint page_no,
724  ulint zip_size,
726  ulint bit,
727  ulint val,
728  mtr_t* mtr)
729 {
730  ulint byte_offset;
731  ulint bit_offset;
732  ulint map_byte;
733 
734  ut_ad(bit < IBUF_BITS_PER_PAGE);
735 #if IBUF_BITS_PER_PAGE % 2
736 # error "IBUF_BITS_PER_PAGE % 2 != 0"
737 #endif
738  ut_ad(ut_is_2pow(zip_size));
739  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
740 #ifdef UNIV_IBUF_COUNT_DEBUG
741  ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
742  || (0 == ibuf_count_get(page_get_space_id(page),
743  page_no)));
744 #endif
745  if (!zip_size) {
746  bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
747  + bit;
748  } else {
749  bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
750  + bit;
751  }
752 
753  byte_offset = bit_offset / 8;
754  bit_offset = bit_offset % 8;
755 
756  ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
757 
758  map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
759 
760  if (bit == IBUF_BITMAP_FREE) {
761  ut_ad(bit_offset + 1 < 8);
762  ut_ad(val <= 3);
763 
764  map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
765  map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
766  } else {
767  ut_ad(val <= 1);
768  map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
769  }
770 
771  mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
772  MLOG_1BYTE, mtr);
773 }
774 
775 /********************************************************************/
778 UNIV_INLINE
779 ulint
780 ibuf_bitmap_page_no_calc(
781 /*=====================*/
782  ulint zip_size,
784  ulint page_no)
785 {
786  ut_ad(ut_is_2pow(zip_size));
787 
788  if (!zip_size) {
789  return(FSP_IBUF_BITMAP_OFFSET
790  + (page_no & ~(UNIV_PAGE_SIZE - 1)));
791  } else {
792  return(FSP_IBUF_BITMAP_OFFSET
793  + (page_no & ~(zip_size - 1)));
794  }
795 }
796 
797 /********************************************************************/
803 static
804 page_t*
805 ibuf_bitmap_get_map_page_func(
806 /*==========================*/
807  ulint space,
808  ulint page_no,
809  ulint zip_size,
811  const char* file,
812  ulint line,
813  mtr_t* mtr)
814 {
815  buf_block_t* block;
816 
817  block = buf_page_get_gen(space, zip_size,
818  ibuf_bitmap_page_no_calc(zip_size, page_no),
819  RW_X_LATCH, NULL, BUF_GET,
820  file, line, mtr);
821  buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
822 
823  return(buf_block_get_frame(block));
824 }
825 
826 /********************************************************************/
836 #define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
837  ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
838  __FILE__, __LINE__, mtr)
839 
840 /************************************************************************/
845 UNIV_INLINE
846 void
847 ibuf_set_free_bits_low(
848 /*===================*/
849  ulint zip_size,
851  const buf_block_t* block,
854  ulint val,
855  mtr_t* mtr)
856 {
857  page_t* bitmap_page;
858  ulint space;
859  ulint page_no;
860 
861  if (!page_is_leaf(buf_block_get_frame(block))) {
862 
863  return;
864  }
865 
866  space = buf_block_get_space(block);
867  page_no = buf_block_get_page_no(block);
868  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
869 #ifdef UNIV_IBUF_DEBUG
870 # if 0
871  fprintf(stderr,
872  "Setting space %lu page %lu free bits to %lu should be %lu\n",
873  space, page_no, val,
874  ibuf_index_page_calc_free(zip_size, block));
875 # endif
876 
877  ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
878 #endif /* UNIV_IBUF_DEBUG */
879  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
880  IBUF_BITMAP_FREE, val, mtr);
881 }
882 
883 /************************************************************************/
888 UNIV_INTERN
889 void
890 ibuf_set_free_bits_func(
891 /*====================*/
892  buf_block_t* block,
894 #ifdef UNIV_IBUF_DEBUG
895  ulint max_val,
898 #endif /* UNIV_IBUF_DEBUG */
899  ulint val)
900 {
901  mtr_t mtr;
902  page_t* page;
903  page_t* bitmap_page;
904  ulint space;
905  ulint page_no;
906  ulint zip_size;
907 
908  page = buf_block_get_frame(block);
909 
910  if (!page_is_leaf(page)) {
911 
912  return;
913  }
914 
915  mtr_start(&mtr);
916 
917  space = buf_block_get_space(block);
918  page_no = buf_block_get_page_no(block);
919  zip_size = buf_block_get_zip_size(block);
920  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
921 
922 #ifdef UNIV_IBUF_DEBUG
923  if (max_val != ULINT_UNDEFINED) {
924  ulint old_val;
925 
926  old_val = ibuf_bitmap_page_get_bits(
927  bitmap_page, page_no, zip_size,
928  IBUF_BITMAP_FREE, &mtr);
929 # if 0
930  if (old_val != max_val) {
931  fprintf(stderr,
932  "Ibuf: page %lu old val %lu max val %lu\n",
933  page_get_page_no(page),
934  old_val, max_val);
935  }
936 # endif
937 
938  ut_a(old_val <= max_val);
939  }
940 # if 0
941  fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
942  page_get_page_no(page), val,
943  ibuf_index_page_calc_free(zip_size, block));
944 # endif
945 
946  ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
947 #endif /* UNIV_IBUF_DEBUG */
948  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
949  IBUF_BITMAP_FREE, val, &mtr);
950  mtr_commit(&mtr);
951 }
952 
953 /************************************************************************/
962 UNIV_INTERN
963 void
964 ibuf_reset_free_bits(
965 /*=================*/
966  buf_block_t* block)
969 {
970  ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
971 }
972 
973 /**********************************************************************/
981 UNIV_INTERN
982 void
983 ibuf_update_free_bits_low(
984 /*======================*/
985  const buf_block_t* block,
986  ulint max_ins_size,
991  mtr_t* mtr)
992 {
993  ulint before;
994  ulint after;
995 
996  ut_a(!buf_block_get_page_zip(block));
997 
998  before = ibuf_index_page_calc_free_bits(0, max_ins_size);
999 
1000  after = ibuf_index_page_calc_free(0, block);
1001 
1002  /* This approach cannot be used on compressed pages, since the
1003  computed value of "before" often does not match the current
1004  state of the bitmap. This is because the free space may
1005  increase or decrease when a compressed page is reorganized. */
1006  if (before != after) {
1007  ibuf_set_free_bits_low(0, block, after, mtr);
1008  }
1009 }
1010 
1011 /**********************************************************************/
1019 UNIV_INTERN
1020 void
1021 ibuf_update_free_bits_zip(
1022 /*======================*/
1023  buf_block_t* block,
1024  mtr_t* mtr)
1025 {
1026  page_t* bitmap_page;
1027  ulint space;
1028  ulint page_no;
1029  ulint zip_size;
1030  ulint after;
1031 
1032  space = buf_block_get_space(block);
1033  page_no = buf_block_get_page_no(block);
1034  zip_size = buf_block_get_zip_size(block);
1035 
1036  ut_a(page_is_leaf(buf_block_get_frame(block)));
1037  ut_a(zip_size);
1038 
1039  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
1040 
1041  after = ibuf_index_page_calc_free_zip(zip_size, block);
1042 
1043  if (after == 0) {
1044  /* We move the page to the front of the buffer pool LRU list:
1045  the purpose of this is to prevent those pages to which we
1046  cannot make inserts using the insert buffer from slipping
1047  out of the buffer pool */
1048 
1049  buf_page_make_young(&block->page);
1050  }
1051 
1052  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
1053  IBUF_BITMAP_FREE, after, mtr);
1054 }
1055 
1056 /**********************************************************************/
1063 UNIV_INTERN
1064 void
1065 ibuf_update_free_bits_for_two_pages_low(
1066 /*====================================*/
1067  ulint zip_size,
1069  buf_block_t* block1,
1070  buf_block_t* block2,
1071  mtr_t* mtr)
1072 {
1073  ulint state;
1074 
1075  /* As we have to x-latch two random bitmap pages, we have to acquire
1076  the bitmap mutex to prevent a deadlock with a similar operation
1077  performed by another OS thread. */
1078 
1079  mutex_enter(&ibuf_bitmap_mutex);
1080 
1081  state = ibuf_index_page_calc_free(zip_size, block1);
1082 
1083  ibuf_set_free_bits_low(zip_size, block1, state, mtr);
1084 
1085  state = ibuf_index_page_calc_free(zip_size, block2);
1086 
1087  ibuf_set_free_bits_low(zip_size, block2, state, mtr);
1088 
1089  mutex_exit(&ibuf_bitmap_mutex);
1090 }
1091 
1092 /**********************************************************************/
1095 UNIV_INLINE
1096 ibool
1097 ibuf_fixed_addr_page(
1098 /*=================*/
1099  ulint space,
1100  ulint zip_size,
1102  ulint page_no)
1103 {
1104  return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
1105  || ibuf_bitmap_page(zip_size, page_no));
1106 }
1107 
1108 /***********************************************************************/
1112 UNIV_INTERN
1113 ibool
1114 ibuf_page(
1115 /*======*/
1116  ulint space,
1117  ulint zip_size,
1118  ulint page_no,
1119  mtr_t* mtr)
1123 {
1124  ibool ret;
1125  mtr_t local_mtr;
1126  page_t* bitmap_page;
1127 
1129 
1130  if (srv_fake_write)
1131  return(FALSE);
1132 
1133  if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
1134 
1135  return(TRUE);
1136  } else if (space != IBUF_SPACE_ID) {
1137 
1138  return(FALSE);
1139  }
1140 
1141  ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
1142 
1143  if (mtr == NULL) {
1144  mtr = &local_mtr;
1145  mtr_start(mtr);
1146  }
1147 
1148  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
1149 
1150  ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
1151  IBUF_BITMAP_IBUF, mtr);
1152 
1153  if (mtr == &local_mtr) {
1154  mtr_commit(mtr);
1155  }
1156 
1157  return(ret);
1158 }
1159 
1160 /********************************************************************/
1163 static
1164 ulint
1165 ibuf_rec_get_page_no(
1166 /*=================*/
1167  const rec_t* rec)
1168 {
1169  const byte* field;
1170  ulint len;
1171 
1172  ut_ad(ibuf_inside());
1173  ut_ad(rec_get_n_fields_old(rec) > 2);
1174 
1175  field = rec_get_nth_field_old(rec, 1, &len);
1176 
1177  if (len == 1) {
1178  /* This is of the >= 4.1.x record format */
1179  ut_a(trx_sys_multiple_tablespace_format);
1180 
1181  field = rec_get_nth_field_old(rec, 2, &len);
1182  } else {
1183  ut_a(trx_doublewrite_must_reset_space_ids);
1184  ut_a(!trx_sys_multiple_tablespace_format);
1185 
1186  field = rec_get_nth_field_old(rec, 0, &len);
1187  }
1188 
1189  ut_a(len == 4);
1190 
1191  return(mach_read_from_4(field));
1192 }
1193 
1194 /********************************************************************/
1198 static
1199 ulint
1200 ibuf_rec_get_space(
1201 /*===============*/
1202  const rec_t* rec)
1203 {
1204  const byte* field;
1205  ulint len;
1206 
1207  ut_ad(ibuf_inside());
1208  ut_ad(rec_get_n_fields_old(rec) > 2);
1209 
1210  field = rec_get_nth_field_old(rec, 1, &len);
1211 
1212  if (len == 1) {
1213  /* This is of the >= 4.1.x record format */
1214 
1215  ut_a(trx_sys_multiple_tablespace_format);
1216  field = rec_get_nth_field_old(rec, 0, &len);
1217  ut_a(len == 4);
1218 
1219  return(mach_read_from_4(field));
1220  }
1221 
1222  ut_a(trx_doublewrite_must_reset_space_ids);
1223  ut_a(!trx_sys_multiple_tablespace_format);
1224 
1225  return(0);
1226 }
1227 
1228 /****************************************************************/
1230 static
1231 void
1232 ibuf_rec_get_info(
1233 /*==============*/
1234  const rec_t* rec,
1235  ibuf_op_t* op,
1236  ibool* comp,
1237  ulint* info_len,
1240  ulint* counter)
1241 {
1242  const byte* types;
1243  ulint fields;
1244  ulint len;
1245 
1246  /* Local variables to shadow arguments. */
1247  ibuf_op_t op_local;
1248  ibool comp_local;
1249  ulint info_len_local;
1250  ulint counter_local;
1251 
1252  ut_ad(ibuf_inside());
1253  fields = rec_get_n_fields_old(rec);
1254  ut_a(fields > 4);
1255 
1256  types = rec_get_nth_field_old(rec, 3, &len);
1257 
1258  info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1259 
1260  switch (info_len_local) {
1261  case 0:
1262  case 1:
1263  op_local = IBUF_OP_INSERT;
1264  comp_local = info_len_local;
1265  ut_ad(!counter);
1266  counter_local = ULINT_UNDEFINED;
1267  break;
1268 
1269  case IBUF_REC_INFO_SIZE:
1270  op_local = (ibuf_op_t)types[IBUF_REC_OFFSET_TYPE];
1271  comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
1272  counter_local = mach_read_from_2(
1273  types + IBUF_REC_OFFSET_COUNTER);
1274  break;
1275 
1276  default:
1277  ut_error;
1278  }
1279 
1280  ut_a(op_local < IBUF_OP_COUNT);
1281  ut_a((len - info_len_local) ==
1282  (fields - 4) * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1283 
1284  if (op) {
1285  *op = op_local;
1286  }
1287 
1288  if (comp) {
1289  *comp = comp_local;
1290  }
1291 
1292  if (info_len) {
1293  *info_len = info_len_local;
1294  }
1295 
1296  if (counter) {
1297  *counter = counter_local;
1298  }
1299 }
1300 
1301 /****************************************************************/
1304 static
1305 ibuf_op_t
1306 ibuf_rec_get_op_type(
1307 /*=================*/
1308  const rec_t* rec)
1309 {
1310  ulint len;
1311 
1312  ut_ad(ibuf_inside());
1313  ut_ad(rec_get_n_fields_old(rec) > 2);
1314 
1315  (void) rec_get_nth_field_old(rec, 1, &len);
1316 
1317  if (len > 1) {
1318  /* This is a < 4.1.x format record */
1319 
1320  return(IBUF_OP_INSERT);
1321  } else {
1322  ibuf_op_t op;
1323 
1324  ibuf_rec_get_info(rec, &op, NULL, NULL, NULL);
1325 
1326  return(op);
1327  }
1328 }
1329 
1330 /****************************************************************/
1335 UNIV_INTERN
1336 ulint
1337 ibuf_rec_get_counter(
1338 /*=================*/
1339  const rec_t* rec)
1340 {
1341  const byte* ptr;
1342  ulint len;
1343 
1344  if (rec_get_n_fields_old(rec) < 4) {
1345 
1346  return(ULINT_UNDEFINED);
1347  }
1348 
1349  ptr = rec_get_nth_field_old(rec, 3, &len);
1350 
1351  if (len >= 2) {
1352 
1353  return(mach_read_from_2(ptr));
1354  } else {
1355 
1356  return(ULINT_UNDEFINED);
1357  }
1358 }
1359 
1360 /****************************************************************/
1363 static
1364 void
1365 ibuf_add_ops(
1366 /*=========*/
1367  ulint* arr,
1368  const ulint* ops)
1370 {
1371  ulint i;
1372 
1373 #ifndef HAVE_ATOMIC_BUILTINS
1374  ut_ad(mutex_own(&ibuf_mutex));
1375 #endif /* !HAVE_ATOMIC_BUILTINS */
1376 
1377  for (i = 0; i < IBUF_OP_COUNT; i++) {
1378 #ifdef HAVE_ATOMIC_BUILTINS
1379  os_atomic_increment_ulint(&arr[i], ops[i]);
1380 #else /* HAVE_ATOMIC_BUILTINS */
1381  arr[i] += ops[i];
1382 #endif /* HAVE_ATOMIC_BUILTINS */
1383  }
1384 }
1385 
1386 /****************************************************************/
1388 static
1389 void
1390 ibuf_print_ops(
1391 /*===========*/
1392  const ulint* ops,
1393  FILE* file)
1394 {
1395  static const char* op_names[] = {
1396  "insert",
1397  "delete mark",
1398  "delete"
1399  };
1400  ulint i;
1401 
1402  ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
1403 
1404  for (i = 0; i < IBUF_OP_COUNT; i++) {
1405  fprintf(file, "%s %lu%s", op_names[i],
1406  (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
1407  }
1408 
1409  putc('\n', file);
1410 }
1411 
1412 /********************************************************************/
1415 static
1416 dict_index_t*
1417 ibuf_dummy_index_create(
1418 /*====================*/
1419  ulint n,
1420  ibool comp)
1421 {
1422  dict_table_t* table;
1423  dict_index_t* index;
1424 
1425  table = dict_mem_table_create("IBUF_DUMMY",
1426  DICT_HDR_SPACE, n,
1427  comp ? DICT_TF_COMPACT : 0);
1428 
1429  index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
1430  DICT_HDR_SPACE, 0, n);
1431 
1432  index->table = table;
1433 
1434  /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1435  index->cached = TRUE;
1436 
1437  return(index);
1438 }
1439 /********************************************************************/
1441 static
1442 void
1443 ibuf_dummy_index_add_col(
1444 /*=====================*/
1445  dict_index_t* index,
1446  const dtype_t* type,
1447  ulint len)
1448 {
1449  ulint i = index->table->n_def;
1450  dict_mem_table_add_col(index->table, NULL, NULL,
1451  dtype_get_mtype(type),
1452  dtype_get_prtype(type),
1453  dtype_get_len(type));
1454  dict_index_add_col(index, index->table,
1455  dict_table_get_nth_col(index->table, i), len);
1456 }
1457 /********************************************************************/
1459 static
1460 void
1461 ibuf_dummy_index_free(
1462 /*==================*/
1463  dict_index_t* index)
1464 {
1465  dict_table_t* table = index->table;
1466 
1467  dict_mem_index_free(index);
1468  dict_mem_table_free(table);
1469 }
1470 
1471 /*********************************************************************/
1479 UNIV_INLINE
1480 dtuple_t*
1481 ibuf_build_entry_pre_4_1_x(
1482 /*=======================*/
1483  const rec_t* ibuf_rec,
1484  mem_heap_t* heap,
1485  dict_index_t** pindex)
1487 {
1488  ulint i;
1489  ulint len;
1490  const byte* types;
1491  dtuple_t* tuple;
1492  ulint n_fields;
1493 
1494  ut_a(trx_doublewrite_must_reset_space_ids);
1495  ut_a(!trx_sys_multiple_tablespace_format);
1496 
1497  n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1498  tuple = dtuple_create(heap, n_fields);
1499  types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1500 
1501  ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1502 
1503  for (i = 0; i < n_fields; i++) {
1504  const byte* data;
1505  dfield_t* field;
1506 
1507  field = dtuple_get_nth_field(tuple, i);
1508 
1509  data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1510 
1511  dfield_set_data(field, data, len);
1512 
1514  dfield_get_type(field),
1515  types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1516  }
1517 
1518  *pindex = ibuf_dummy_index_create(n_fields, FALSE);
1519 
1520  return(tuple);
1521 }
1522 
1523 /*********************************************************************/
1539 static
1540 dtuple_t*
1541 ibuf_build_entry_from_ibuf_rec(
1542 /*===========================*/
1543  const rec_t* ibuf_rec,
1544  mem_heap_t* heap,
1545  dict_index_t** pindex)
1547 {
1548  dtuple_t* tuple;
1549  dfield_t* field;
1550  ulint n_fields;
1551  const byte* types;
1552  const byte* data;
1553  ulint len;
1554  ulint info_len;
1555  ulint i;
1556  ulint comp;
1557  dict_index_t* index;
1558 
1559  data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1560 
1561  if (len > 1) {
1562  /* This a < 4.1.x format record */
1563 
1564  return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
1565  }
1566 
1567  /* This a >= 4.1.x format record */
1568 
1569  ut_a(trx_sys_multiple_tablespace_format);
1570  ut_a(*data == 0);
1571  ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
1572 
1573  n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1574 
1575  tuple = dtuple_create(heap, n_fields);
1576 
1577  types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1578 
1579  ibuf_rec_get_info(ibuf_rec, NULL, &comp, &info_len, NULL);
1580 
1581  index = ibuf_dummy_index_create(n_fields, comp);
1582 
1583  len -= info_len;
1584  types += info_len;
1585 
1586  ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1587 
1588  for (i = 0; i < n_fields; i++) {
1589  field = dtuple_get_nth_field(tuple, i);
1590 
1591  data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1592 
1593  dfield_set_data(field, data, len);
1594 
1596  dfield_get_type(field),
1597  types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1598 
1599  ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
1600  }
1601 
1602  /* Prevent an ut_ad() failure in page_zip_write_rec() by
1603  adding system columns to the dummy table pointed to by the
1604  dummy secondary index. The insert buffer is only used for
1605  secondary indexes, whose records never contain any system
1606  columns, such as DB_TRX_ID. */
1607  ut_d(dict_table_add_system_columns(index->table, index->table->heap));
1608 
1609  *pindex = index;
1610 
1611  return(tuple);
1612 }
1613 
1614 /******************************************************************/
1617 UNIV_INLINE
1618 ulint
1619 ibuf_rec_get_size(
1620 /*==============*/
1621  const rec_t* rec,
1622  const byte* types,
1623  ulint n_fields,
1624  ibool pre_4_1,
1626  ulint comp)
1628 {
1629  ulint i;
1630  ulint field_offset;
1631  ulint types_offset;
1632  ulint size = 0;
1633 
1634  if (pre_4_1) {
1635  field_offset = 2;
1636  types_offset = DATA_ORDER_NULL_TYPE_BUF_SIZE;
1637  } else {
1638  field_offset = 4;
1639  types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1640  }
1641 
1642  for (i = 0; i < n_fields; i++) {
1643  ulint len;
1644  dtype_t dtype;
1645 
1646  rec_get_nth_field_offs_old(rec, i + field_offset, &len);
1647 
1648  if (len != UNIV_SQL_NULL) {
1649  size += len;
1650  } else if (pre_4_1) {
1651  dtype_read_for_order_and_null_size(&dtype, types);
1652 
1653  size += dtype_get_sql_null_size(&dtype, comp);
1654  } else {
1656 
1657  size += dtype_get_sql_null_size(&dtype, comp);
1658  }
1659 
1660  types += types_offset;
1661  }
1662 
1663  return(size);
1664 }
1665 
1666 /********************************************************************/
1671 static
1672 ulint
1673 ibuf_rec_get_volume(
1674 /*================*/
1675  const rec_t* ibuf_rec)
1676 {
1677  ulint len;
1678  const byte* data;
1679  const byte* types;
1680  ulint n_fields;
1681  ulint data_size;
1682  ibool pre_4_1;
1683  ulint comp;
1684 
1685  ut_ad(ibuf_inside());
1686  ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
1687 
1688  data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1689  pre_4_1 = (len > 1);
1690 
1691  if (pre_4_1) {
1692  /* < 4.1.x format record */
1693 
1694  ut_a(trx_doublewrite_must_reset_space_ids);
1695  ut_a(!trx_sys_multiple_tablespace_format);
1696 
1697  n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1698 
1699  types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1700 
1701  ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1702  comp = 0;
1703  } else {
1704  /* >= 4.1.x format record */
1705  ibuf_op_t op;
1706  ulint info_len;
1707 
1708  ut_a(trx_sys_multiple_tablespace_format);
1709  ut_a(*data == 0);
1710 
1711  types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1712 
1713  ibuf_rec_get_info(ibuf_rec, &op, &comp, &info_len, NULL);
1714 
1715  if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
1716  /* Delete-marking a record doesn't take any
1717  additional space, and while deleting a record
1718  actually frees up space, we have to play it safe and
1719  pretend it takes no additional space (the record
1720  might not exist, etc.). */
1721 
1722  return(0);
1723  } else if (comp) {
1724  dtuple_t* entry;
1725  ulint volume;
1726  dict_index_t* dummy_index;
1727  mem_heap_t* heap = mem_heap_create(500);
1728 
1729  entry = ibuf_build_entry_from_ibuf_rec(
1730  ibuf_rec, heap, &dummy_index);
1731 
1732  volume = rec_get_converted_size(dummy_index, entry, 0);
1733 
1734  ibuf_dummy_index_free(dummy_index);
1735  mem_heap_free(heap);
1736 
1737  return(volume + page_dir_calc_reserved_space(1));
1738  }
1739 
1740  types += info_len;
1741  n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1742  }
1743 
1744  data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp);
1745 
1746  return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1748 }
1749 
1750 /*********************************************************************/
1758 static
1759 dtuple_t*
1760 ibuf_entry_build(
1761 /*=============*/
1762  ibuf_op_t op,
1763  dict_index_t* index,
1764  const dtuple_t* entry,
1765  ulint space,
1766  ulint page_no,
1768  ulint counter,
1770  mem_heap_t* heap)
1771 {
1772  dtuple_t* tuple;
1773  dfield_t* field;
1774  const dfield_t* entry_field;
1775  ulint n_fields;
1776  byte* buf;
1777  byte* ti;
1778  byte* type_info;
1779  ulint i;
1780 
1781  ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
1782  ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
1783  ut_ad(op < IBUF_OP_COUNT);
1784 
1785  /* We have to build a tuple with the following fields:
1786 
1787  1-4) These are described at the top of this file.
1788 
1789  5) The rest of the fields are copied from the entry.
1790 
1791  All fields in the tuple are ordered like the type binary in our
1792  insert buffer tree. */
1793 
1794  n_fields = dtuple_get_n_fields(entry);
1795 
1796  tuple = dtuple_create(heap, n_fields + 4);
1797 
1798  /* 1) Space Id */
1799 
1800  field = dtuple_get_nth_field(tuple, 0);
1801 
1802  buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
1803 
1804  mach_write_to_4(buf, space);
1805 
1806  dfield_set_data(field, buf, 4);
1807 
1808  /* 2) Marker byte */
1809 
1810  field = dtuple_get_nth_field(tuple, 1);
1811 
1812  buf = static_cast<byte *>(mem_heap_alloc(heap, 1));
1813 
1814  /* We set the marker byte zero */
1815 
1816  mach_write_to_1(buf, 0);
1817 
1818  dfield_set_data(field, buf, 1);
1819 
1820  /* 3) Page number */
1821 
1822  field = dtuple_get_nth_field(tuple, 2);
1823 
1824  buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
1825 
1826  mach_write_to_4(buf, page_no);
1827 
1828  dfield_set_data(field, buf, 4);
1829 
1830  /* 4) Type info, part #1 */
1831 
1832  if (counter == ULINT_UNDEFINED) {
1833  i = dict_table_is_comp(index->table) ? 1 : 0;
1834  } else {
1835  ut_ad(counter <= 0xFFFF);
1836  i = IBUF_REC_INFO_SIZE;
1837  }
1838 
1839  ti = type_info = static_cast<byte *>(mem_heap_alloc(heap, i + n_fields
1840  * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE));
1841 
1842  switch (i) {
1843  default:
1844  ut_error;
1845  break;
1846  case 1:
1847  /* set the flag for ROW_FORMAT=COMPACT */
1848  *ti++ = 0;
1849  /* fall through */
1850  case 0:
1851  /* the old format does not allow delete buffering */
1852  ut_ad(op == IBUF_OP_INSERT);
1853  break;
1854  case IBUF_REC_INFO_SIZE:
1855  mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
1856 
1857  ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
1858  ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
1859  ? IBUF_REC_COMPACT : 0;
1860  ti += IBUF_REC_INFO_SIZE;
1861  break;
1862  }
1863 
1864  /* 5+) Fields from the entry */
1865 
1866  for (i = 0; i < n_fields; i++) {
1867  ulint fixed_len;
1868  const dict_field_t* ifield;
1869 
1870  /* We add 4 below because we have the 4 extra fields at the
1871  start of an ibuf record */
1872 
1873  field = dtuple_get_nth_field(tuple, i + 4);
1874  entry_field = dtuple_get_nth_field(entry, i);
1875  dfield_copy(field, entry_field);
1876 
1877  ifield = dict_index_get_nth_field(index, i);
1878  /* Prefix index columns of fixed-length columns are of
1879  fixed length. However, in the function call below,
1880  dfield_get_type(entry_field) contains the fixed length
1881  of the column in the clustered index. Replace it with
1882  the fixed length of the secondary index column. */
1883  fixed_len = ifield->fixed_len;
1884 
1885 #ifdef UNIV_DEBUG
1886  if (fixed_len) {
1887  /* dict_index_add_col() should guarantee these */
1888  ut_ad(fixed_len <= (ulint)
1889  dfield_get_type(entry_field)->len);
1890  if (ifield->prefix_len) {
1891  ut_ad(ifield->prefix_len == fixed_len);
1892  } else {
1893  ut_ad(fixed_len == (ulint)
1894  dfield_get_type(entry_field)->len);
1895  }
1896  }
1897 #endif /* UNIV_DEBUG */
1898 
1900  ti, dfield_get_type(entry_field), fixed_len);
1901  ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1902  }
1903 
1904  /* 4) Type info, part #2 */
1905 
1906  field = dtuple_get_nth_field(tuple, 3);
1907 
1908  dfield_set_data(field, type_info, ti - type_info);
1909 
1910  /* Set all the types in the new tuple binary */
1911 
1912  dtuple_set_types_binary(tuple, n_fields + 4);
1913 
1914  return(tuple);
1915 }
1916 
1917 /*********************************************************************/
1921 static
1922 dtuple_t*
1923 ibuf_search_tuple_build(
1924 /*====================*/
1925  ulint space,
1926  ulint page_no,
1927  mem_heap_t* heap)
1928 {
1929  dtuple_t* tuple;
1930  dfield_t* field;
1931  byte* buf;
1932 
1933  ut_a(space == 0);
1934  ut_a(trx_doublewrite_must_reset_space_ids);
1935  ut_a(!trx_sys_multiple_tablespace_format);
1936 
1937  tuple = dtuple_create(heap, 1);
1938 
1939  /* Store the page number in tuple */
1940 
1941  field = dtuple_get_nth_field(tuple, 0);
1942 
1943  buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
1944 
1945  mach_write_to_4(buf, page_no);
1946 
1947  dfield_set_data(field, buf, 4);
1948 
1949  dtuple_set_types_binary(tuple, 1);
1950 
1951  return(tuple);
1952 }
1953 
1954 /*********************************************************************/
1958 static
1959 dtuple_t*
1960 ibuf_new_search_tuple_build(
1961 /*========================*/
1962  ulint space,
1963  ulint page_no,
1964  mem_heap_t* heap)
1965 {
1966  dtuple_t* tuple;
1967  dfield_t* field;
1968  byte* buf;
1969 
1970  ut_a(trx_sys_multiple_tablespace_format);
1971 
1972  tuple = dtuple_create(heap, 3);
1973 
1974  /* Store the space id in tuple */
1975 
1976  field = dtuple_get_nth_field(tuple, 0);
1977 
1978  buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
1979 
1980  mach_write_to_4(buf, space);
1981 
1982  dfield_set_data(field, buf, 4);
1983 
1984  /* Store the new format record marker byte */
1985 
1986  field = dtuple_get_nth_field(tuple, 1);
1987 
1988  buf = static_cast<byte *>(mem_heap_alloc(heap, 1));
1989 
1990  mach_write_to_1(buf, 0);
1991 
1992  dfield_set_data(field, buf, 1);
1993 
1994  /* Store the page number in tuple */
1995 
1996  field = dtuple_get_nth_field(tuple, 2);
1997 
1998  buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
1999 
2000  mach_write_to_4(buf, page_no);
2001 
2002  dfield_set_data(field, buf, 4);
2003 
2004  dtuple_set_types_binary(tuple, 3);
2005 
2006  return(tuple);
2007 }
2008 
2009 /*********************************************************************/
2013 UNIV_INLINE
2014 ibool
2015 ibuf_data_enough_free_for_insert(void)
2016 /*==================================*/
2017 {
2018  ut_ad(mutex_own(&ibuf_mutex));
2019 
2020  /* We want a big margin of free pages, because a B-tree can sometimes
2021  grow in size also if records are deleted from it, as the node pointers
2022  can change, and we must make sure that we are able to delete the
2023  inserts buffered for pages that we read to the buffer pool, without
2024  any risk of running out of free space in the insert buffer. */
2025 
2026  return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
2027 }
2028 
2029 /*********************************************************************/
2033 UNIV_INLINE
2034 ibool
2035 ibuf_data_too_much_free(void)
2036 /*=========================*/
2037 {
2038  ut_ad(mutex_own(&ibuf_mutex));
2039 
2040  return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
2041 }
2042 
2043 /*********************************************************************/
2047 static
2048 ibool
2049 ibuf_add_free_page(void)
2050 /*====================*/
2051 {
2052  mtr_t mtr;
2053  page_t* header_page;
2054  ulint flags;
2055  ulint zip_size;
2056  ulint page_no;
2057  page_t* page;
2058  page_t* root;
2059  page_t* bitmap_page;
2060 
2061  mtr_start(&mtr);
2062 
2063  /* Acquire the fsp latch before the ibuf header, obeying the latching
2064  order */
2065  mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
2066  zip_size = dict_table_flags_to_zip_size(flags);
2067 
2068  header_page = ibuf_header_page_get(&mtr);
2069 
2070  /* Allocate a new page: NOTE that if the page has been a part of a
2071  non-clustered index which has subsequently been dropped, then the
2072  page may have buffered inserts in the insert buffer, and these
2073  should be deleted from there. These get deleted when the page
2074  allocation creates the page in buffer. Thus the call below may end
2075  up calling the insert buffer routines and, as we yet have no latches
2076  to insert buffer tree pages, these routines can run without a risk
2077  of a deadlock. This is the reason why we created a special ibuf
2078  header page apart from the ibuf tree. */
2079 
2080  page_no = fseg_alloc_free_page(
2081  header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
2082  &mtr);
2083 
2084  if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
2085  mtr_commit(&mtr);
2086 
2087  return(FALSE);
2088  }
2089 
2090  {
2091  buf_block_t* block;
2092 
2093  block = buf_page_get(
2094  IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
2095 
2096  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
2097 
2098 
2099  page = buf_block_get_frame(block);
2100  }
2101 
2102  ibuf_enter();
2103 
2104  mutex_enter(&ibuf_mutex);
2105 
2106  root = ibuf_tree_root_get(&mtr);
2107 
2108  /* Add the page to the free list and update the ibuf size data */
2109 
2110  flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
2111  page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
2112 
2114  MLOG_2BYTES, &mtr);
2115 
2116  ibuf->seg_size++;
2117  ibuf->free_list_len++;
2118 
2119  /* Set the bit indicating that this page is now an ibuf tree page
2120  (level 2 page) */
2121 
2122  bitmap_page = ibuf_bitmap_get_map_page(
2123  IBUF_SPACE_ID, page_no, zip_size, &mtr);
2124 
2125  mutex_exit(&ibuf_mutex);
2126 
2127  ibuf_bitmap_page_set_bits(
2128  bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
2129 
2130  mtr_commit(&mtr);
2131 
2132  ibuf_exit();
2133 
2134  return(TRUE);
2135 }
2136 
2137 /*********************************************************************/
2139 static
2140 void
2141 ibuf_remove_free_page(void)
2142 /*=======================*/
2143 {
2144  mtr_t mtr;
2145  mtr_t mtr2;
2146  page_t* header_page;
2147  ulint flags;
2148  ulint zip_size;
2149  ulint page_no;
2150  page_t* page;
2151  page_t* root;
2152  page_t* bitmap_page;
2153 
2154  mtr_start(&mtr);
2155 
2156  /* Acquire the fsp latch before the ibuf header, obeying the latching
2157  order */
2158  mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
2159  zip_size = dict_table_flags_to_zip_size(flags);
2160 
2161  header_page = ibuf_header_page_get(&mtr);
2162 
2163  /* Prevent pessimistic inserts to insert buffer trees for a while */
2164  ibuf_enter();
2165  mutex_enter(&ibuf_pessimistic_insert_mutex);
2166  mutex_enter(&ibuf_mutex);
2167 
2168  if (!ibuf_data_too_much_free()) {
2169 
2170  mutex_exit(&ibuf_mutex);
2171  mutex_exit(&ibuf_pessimistic_insert_mutex);
2172 
2173  ibuf_exit();
2174 
2175  mtr_commit(&mtr);
2176 
2177  return;
2178  }
2179 
2180  mtr_start(&mtr2);
2181 
2182  root = ibuf_tree_root_get(&mtr2);
2183 
2184  mutex_exit(&ibuf_mutex);
2185 
2186  page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
2187  &mtr2).page;
2188 
2189  /* NOTE that we must release the latch on the ibuf tree root
2190  because in fseg_free_page we access level 1 pages, and the root
2191  is a level 2 page. */
2192 
2193  mtr_commit(&mtr2);
2194 
2195  ibuf_exit();
2196 
2197  /* Since pessimistic inserts were prevented, we know that the
2198  page is still in the free list. NOTE that also deletes may take
2199  pages from the free list, but they take them from the start, and
2200  the free list was so long that they cannot have taken the last
2201  page from it. */
2202 
2203  fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
2204  IBUF_SPACE_ID, page_no, &mtr);
2205 
2206 #ifdef UNIV_DEBUG_FILE_ACCESSES
2207  buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
2208 #endif
2209 
2210  ibuf_enter();
2211 
2212  mutex_enter(&ibuf_mutex);
2213 
2214  root = ibuf_tree_root_get(&mtr);
2215 
2216  ut_ad(page_no == flst_get_last(root + PAGE_HEADER
2217  + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
2218 
2219  {
2220  buf_block_t* block;
2221 
2222  block = buf_page_get(
2223  IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
2224 
2225  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
2226 
2227 
2228  page = buf_block_get_frame(block);
2229  }
2230 
2231  /* Remove the page from the free list and update the ibuf size data */
2232 
2233  flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
2234  page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
2235 
2236  mutex_exit(&ibuf_pessimistic_insert_mutex);
2237 
2238  ibuf->seg_size--;
2239  ibuf->free_list_len--;
2240 
2241  /* Set the bit indicating that this page is no more an ibuf tree page
2242  (level 2 page) */
2243 
2244  bitmap_page = ibuf_bitmap_get_map_page(
2245  IBUF_SPACE_ID, page_no, zip_size, &mtr);
2246 
2247  mutex_exit(&ibuf_mutex);
2248 
2249  ibuf_bitmap_page_set_bits(
2250  bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
2251 
2252 #ifdef UNIV_DEBUG_FILE_ACCESSES
2253  buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
2254 #endif
2255  mtr_commit(&mtr);
2256 
2257  ibuf_exit();
2258 }
2259 
2260 /***********************************************************************/
2264 UNIV_INTERN
2265 void
2266 ibuf_free_excess_pages(void)
2267 /*========================*/
2268 {
2269  ulint i;
2270 
2271 #ifdef UNIV_SYNC_DEBUG
2272  ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
2273  RW_LOCK_EX));
2274 #endif /* UNIV_SYNC_DEBUG */
2275 
2277  fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
2278 
2279  ut_ad(!ibuf_inside());
2280 
2281  /* NOTE: We require that the thread did not own the latch before,
2282  because then we know that we can obey the correct latching order
2283  for ibuf latches */
2284 
2285  if (!ibuf) {
2286  /* Not yet initialized; not sure if this is possible, but
2287  does no harm to check for it. */
2288 
2289  return;
2290  }
2291 
2292  /* Free at most a few pages at a time, so that we do not delay the
2293  requested service too much */
2294 
2295  for (i = 0; i < 4; i++) {
2296 
2297  ibool too_much_free;
2298 
2299  mutex_enter(&ibuf_mutex);
2300  too_much_free = ibuf_data_too_much_free();
2301  mutex_exit(&ibuf_mutex);
2302 
2303  if (!too_much_free) {
2304  return;
2305  }
2306 
2307  ibuf_remove_free_page();
2308  }
2309 }
2310 
2311 /*********************************************************************/
2315 static
2316 ulint
2317 ibuf_get_merge_page_nos(
2318 /*====================*/
2319  ibool contract,
2323  rec_t* rec,
2325  ulint* space_ids,
2326  ib_int64_t* space_versions,
2329  ulint* page_nos,
2332  ulint* n_stored)
2334 {
2335  ulint prev_page_no;
2336  ulint prev_space_id;
2337  ulint first_page_no;
2338  ulint first_space_id;
2339  ulint rec_page_no;
2340  ulint rec_space_id;
2341  ulint sum_volumes;
2342  ulint volume_for_page;
2343  ulint rec_volume;
2344  ulint limit;
2345  ulint n_pages;
2346 
2347  *n_stored = 0;
2348 
2349  limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);
2350 
2351  if (page_rec_is_supremum(rec)) {
2352 
2353  rec = page_rec_get_prev(rec);
2354  }
2355 
2356  if (page_rec_is_infimum(rec)) {
2357 
2358  rec = page_rec_get_next(rec);
2359  }
2360 
2361  if (page_rec_is_supremum(rec)) {
2362 
2363  return(0);
2364  }
2365 
2366  first_page_no = ibuf_rec_get_page_no(rec);
2367  first_space_id = ibuf_rec_get_space(rec);
2368  n_pages = 0;
2369  prev_page_no = 0;
2370  prev_space_id = 0;
2371 
2372  /* Go backwards from the first rec until we reach the border of the
2373  'merge area', or the page start or the limit of storeable pages is
2374  reached */
2375 
2376  while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
2377 
2378  rec_page_no = ibuf_rec_get_page_no(rec);
2379  rec_space_id = ibuf_rec_get_space(rec);
2380 
2381  if (rec_space_id != first_space_id
2382  || (rec_page_no / IBUF_MERGE_AREA)
2383  != (first_page_no / IBUF_MERGE_AREA)) {
2384 
2385  break;
2386  }
2387 
2388  if (rec_page_no != prev_page_no
2389  || rec_space_id != prev_space_id) {
2390  n_pages++;
2391  }
2392 
2393  prev_page_no = rec_page_no;
2394  prev_space_id = rec_space_id;
2395 
2396  rec = page_rec_get_prev(rec);
2397  }
2398 
2399  rec = page_rec_get_next(rec);
2400 
2401  /* At the loop start there is no prev page; we mark this with a pair
2402  of space id, page no (0, 0) for which there can never be entries in
2403  the insert buffer */
2404 
2405  prev_page_no = 0;
2406  prev_space_id = 0;
2407  sum_volumes = 0;
2408  volume_for_page = 0;
2409 
2410  while (*n_stored < limit) {
2411  if (page_rec_is_supremum(rec)) {
2412  /* When no more records available, mark this with
2413  another 'impossible' pair of space id, page no */
2414  rec_page_no = 1;
2415  rec_space_id = 0;
2416  } else {
2417  rec_page_no = ibuf_rec_get_page_no(rec);
2418  rec_space_id = ibuf_rec_get_space(rec);
2419  ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
2420  }
2421 
2422 #ifdef UNIV_IBUF_DEBUG
2423  ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
2424 #endif
2425  if ((rec_space_id != prev_space_id
2426  || rec_page_no != prev_page_no)
2427  && (prev_space_id != 0 || prev_page_no != 0)) {
2428 
2429  if ((prev_page_no == first_page_no
2430  && prev_space_id == first_space_id)
2431  || contract
2432  || (volume_for_page
2433  > ((IBUF_MERGE_THRESHOLD - 1)
2434  * 4 * UNIV_PAGE_SIZE
2435  / IBUF_PAGE_SIZE_PER_FREE_SPACE)
2436  / IBUF_MERGE_THRESHOLD)) {
2437 
2438  space_ids[*n_stored] = prev_space_id;
2439  space_versions[*n_stored]
2440  = fil_space_get_version(prev_space_id);
2441  page_nos[*n_stored] = prev_page_no;
2442 
2443  (*n_stored)++;
2444 
2445  sum_volumes += volume_for_page;
2446  }
2447 
2448  if (rec_space_id != first_space_id
2449  || rec_page_no / IBUF_MERGE_AREA
2450  != first_page_no / IBUF_MERGE_AREA) {
2451 
2452  break;
2453  }
2454 
2455  volume_for_page = 0;
2456  }
2457 
2458  if (rec_page_no == 1 && rec_space_id == 0) {
2459  /* Supremum record */
2460 
2461  break;
2462  }
2463 
2464  rec_volume = ibuf_rec_get_volume(rec);
2465 
2466  volume_for_page += rec_volume;
2467 
2468  prev_page_no = rec_page_no;
2469  prev_space_id = rec_space_id;
2470 
2471  rec = page_rec_get_next(rec);
2472  }
2473 
2474 #ifdef UNIV_IBUF_DEBUG
2475  ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
2476 #endif
2477 #if 0
2478  fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
2479  *n_stored, sum_volumes);
2480 #endif
2481  return(sum_volumes);
2482 }
2483 
2484 /*********************************************************************/
2489 static
2490 ulint
2491 ibuf_contract_ext(
2492 /*==============*/
2493  ulint* n_pages,
2494  ibool sync)
2497 {
2498  btr_pcur_t pcur;
2499  ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2500  ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2501  ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
2502  ulint sum_sizes;
2503  mtr_t mtr;
2504 
2505  *n_pages = 0;
2506  ut_ad(!ibuf_inside());
2507 
2508  /* We perform a dirty read of ibuf->empty, without latching
2509  the insert buffer root page. We trust this dirty read except
2510  when a slow shutdown is being executed. During a slow
2511  shutdown, the insert buffer merge must be completed. */
2512 
2513  if (UNIV_UNLIKELY(ibuf->empty)
2514  && UNIV_LIKELY(!srv_shutdown_state)) {
2516 
2517 #if 0 /* TODO */
2518  if (srv_shutdown_state) {
2519  /* If the insert buffer becomes empty during
2520  shutdown, note it in the system tablespace. */
2521 
2522  trx_sys_set_ibuf_format(TRX_SYS_IBUF_EMPTY);
2523  }
2524 
2525  /* TO DO: call trx_sys_set_ibuf_format() at startup
2526  and whenever ibuf_use is changed to allow buffered
2527  delete-marking or deleting. Never downgrade the
2528  stamped format except when the insert buffer becomes
2529  empty. */
2530 #endif
2531 
2532  return(0);
2533  }
2534 
2535  mtr_start(&mtr);
2536 
2537  ibuf_enter();
2538 
2539  /* Open a cursor to a randomly chosen leaf of the tree, at a random
2540  position within the leaf */
2541 
2542  btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
2543 
2544  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
2545 
2546  if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
2547  /* If a B-tree page is empty, it must be the root page
2548  and the whole B-tree must be empty. InnoDB does not
2549  allow empty B-tree pages other than the root. */
2550  ut_ad(ibuf->empty);
2552  == IBUF_SPACE_ID);
2554  == FSP_IBUF_TREE_ROOT_PAGE_NO);
2555 
2556  ibuf_exit();
2557 
2558  mtr_commit(&mtr);
2559  btr_pcur_close(&pcur);
2560 
2561  goto ibuf_is_empty;
2562  }
2563 
2564  sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
2565  space_ids, space_versions,
2566  page_nos, n_pages);
2567 #if 0 /* defined UNIV_IBUF_DEBUG */
2568  fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
2569  sync, *n_pages, sum_sizes);
2570 #endif
2571  ibuf_exit();
2572 
2573  mtr_commit(&mtr);
2574  btr_pcur_close(&pcur);
2575 
2576  buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
2577  *n_pages);
2578 
2579  return(sum_sizes + 1);
2580 }
2581 
2582 /*********************************************************************/
2587 UNIV_INTERN
2588 ulint
2589 ibuf_contract(
2590 /*==========*/
2591  ibool sync)
2594 {
2595  ulint n_pages;
2596 
2597  return(ibuf_contract_ext(&n_pages, sync));
2598 }
2599 
2600 /*********************************************************************/
2605 UNIV_INTERN
2606 ulint
2607 ibuf_contract_for_n_pages(
2608 /*======================*/
2609  ibool sync,
2612  ulint n_pages)
2615 {
2616  ulint sum_bytes = 0;
2617  ulint sum_pages = 0;
2618  ulint n_bytes;
2619  ulint n_pag2;
2620 
2621  if (srv_fake_write)
2622  return(0);
2623 
2624  while (sum_pages < n_pages) {
2625  n_bytes = ibuf_contract_ext(&n_pag2, sync);
2626 
2627  if (n_bytes == 0) {
2628  return(sum_bytes);
2629  }
2630 
2631  sum_bytes += n_bytes;
2632  sum_pages += n_pag2;
2633  }
2634 
2635  return(sum_bytes);
2636 }
2637 
2638 /*********************************************************************/
2640 UNIV_INLINE
2641 void
2642 ibuf_contract_after_insert(
2643 /*=======================*/
2644  ulint entry_size)
2646 {
2647  ibool sync;
2648  ulint sum_sizes;
2649  ulint size;
2650  ulint max_size;
2651 
2652  /* Perform dirty reads of ibuf->size and ibuf->max_size, to
2653  reduce ibuf_mutex contention. ibuf->max_size remains constant
2654  after ibuf_init_at_db_start(), but ibuf->size should be
2655  protected by ibuf_mutex. Given that ibuf->size fits in a
2656  machine word, this should be OK; at worst we are doing some
2657  excessive ibuf_contract() or occasionally skipping a
2658  ibuf_contract(). */
2659  size = ibuf->size;
2660  max_size = ibuf->max_size;
2661 
2662  if (srv_ibuf_active_contract == false
2663  && size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2664  return;
2665  }
2666 
2667  sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
2668 
2669  /* Contract at least entry_size many bytes */
2670  sum_sizes = 0;
2671  size = 1;
2672 
2673  do {
2674 
2675  size = ibuf_contract(sync);
2676  sum_sizes += size;
2677  } while (size > 0 && sum_sizes < entry_size);
2678 }
2679 
2680 /*********************************************************************/
2683 static
2684 ibool
2685 ibuf_get_volume_buffered_hash(
2686 /*==========================*/
2687  const rec_t* rec,
2688  const byte* types,
2689  const byte* data,
2690  ulint comp,
2692  ulint* hash,
2693  ulint size)
2694 {
2695  ulint len;
2696  ulint fold;
2697  ulint bitmask;
2698 
2699  len = ibuf_rec_get_size(rec, types, rec_get_n_fields_old(rec) - 4,
2700  FALSE, comp);
2701  fold = ut_fold_binary(data, len);
2702 
2703  hash += (fold / (8 * sizeof *hash)) % size; // 8 = bits in byte
2704  bitmask = 1 << (fold % (8 * sizeof *hash));
2705 
2706  if (*hash & bitmask) {
2707 
2708  return(FALSE);
2709  }
2710 
2711  /* We have not seen this record yet. Insert it. */
2712  *hash |= bitmask;
2713 
2714  return(TRUE);
2715 }
2716 
2717 /*********************************************************************/
2722 static
2723 ulint
2724 ibuf_get_volume_buffered_count(
2725 /*===========================*/
2726  const rec_t* rec,
2727  ulint* hash,
2728  ulint size,
2729  lint* n_recs)
2731 {
2732  ulint len;
2733  ibuf_op_t ibuf_op;
2734  const byte* types;
2735  ulint n_fields = rec_get_n_fields_old(rec);
2736 
2737  ut_ad(ibuf_inside());
2738  ut_ad(n_fields > 4);
2739  n_fields -= 4;
2740 
2741  rec_get_nth_field_offs_old(rec, 1, &len);
2742  /* This function is only invoked when buffering new
2743  operations. All pre-4.1 records should have been merged
2744  when the database was started up. */
2745  ut_a(len == 1);
2746  ut_ad(trx_sys_multiple_tablespace_format);
2747 
2748  types = rec_get_nth_field_old(rec, 3, &len);
2749 
2750  switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
2751  IBUF_REC_INFO_SIZE)) {
2752  default:
2753  ut_error;
2754  case 0:
2755  /* This ROW_TYPE=REDUNDANT record does not include an
2756  operation counter. Exclude it from the *n_recs,
2757  because deletes cannot be buffered if there are
2758  old-style inserts buffered for the page. */
2759 
2760  len = ibuf_rec_get_size(rec, types, n_fields, FALSE, 0);
2761 
2762  return(len
2763  + rec_get_converted_extra_size(len, n_fields, 0)
2765  case 1:
2766  /* This ROW_TYPE=COMPACT record does not include an
2767  operation counter. Exclude it from the *n_recs,
2768  because deletes cannot be buffered if there are
2769  old-style inserts buffered for the page. */
2770  goto get_volume_comp;
2771 
2772  case IBUF_REC_INFO_SIZE:
2773  ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
2774  break;
2775  }
2776 
2777  switch (ibuf_op) {
2778  case IBUF_OP_INSERT:
2779  /* Inserts can be done by updating a delete-marked record.
2780  Because delete-mark and insert operations can be pointing to
2781  the same records, we must not count duplicates. */
2782  case IBUF_OP_DELETE_MARK:
2783  /* There must be a record to delete-mark.
2784  See if this record has been already buffered. */
2785  if (n_recs && ibuf_get_volume_buffered_hash(
2786  rec, types + IBUF_REC_INFO_SIZE,
2787  types + len,
2788  types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
2789  hash, size)) {
2790  (*n_recs)++;
2791  }
2792 
2793  if (ibuf_op == IBUF_OP_DELETE_MARK) {
2794  /* Setting the delete-mark flag does not
2795  affect the available space on the page. */
2796  return(0);
2797  }
2798  break;
2799  case IBUF_OP_DELETE:
2800  /* A record will be removed from the page. */
2801  if (n_recs) {
2802  (*n_recs)--;
2803  }
2804  /* While deleting a record actually frees up space,
2805  we have to play it safe and pretend that it takes no
2806  additional space (the record might not exist, etc.). */
2807  return(0);
2808  default:
2809  ut_error;
2810  }
2811 
2812  ut_ad(ibuf_op == IBUF_OP_INSERT);
2813 
2814 get_volume_comp:
2815  {
2816  dtuple_t* entry;
2817  ulint volume;
2818  dict_index_t* dummy_index;
2819  mem_heap_t* heap = mem_heap_create(500);
2820 
2821  entry = ibuf_build_entry_from_ibuf_rec(
2822  rec, heap, &dummy_index);
2823 
2824  volume = rec_get_converted_size(dummy_index, entry, 0);
2825 
2826  ibuf_dummy_index_free(dummy_index);
2827  mem_heap_free(heap);
2828 
2829  return(volume + page_dir_calc_reserved_space(1));
2830  }
2831 }
2832 
2833 /*********************************************************************/
2839 static
2840 ulint
2841 ibuf_get_volume_buffered(
2842 /*=====================*/
2843  btr_pcur_t* pcur,
2848  ulint space,
2849  ulint page_no,
2850  lint* n_recs,
2853  mtr_t* mtr)
2854 {
2855  ulint volume;
2856  rec_t* rec;
2857  page_t* page;
2858  ulint prev_page_no;
2859  page_t* prev_page;
2860  ulint next_page_no;
2861  page_t* next_page;
2862  ulint hash_bitmap[128 / sizeof(ulint)]; /* bitmap of buffered recs */
2863 
2864  ut_a(trx_sys_multiple_tablespace_format);
2865 
2866  ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
2867  || (pcur->latch_mode == BTR_MODIFY_TREE));
2868 
2869  /* Count the volume of inserts earlier in the alphabetical order than
2870  pcur */
2871 
2872  volume = 0;
2873 
2874  if (n_recs) {
2875  memset(hash_bitmap, 0, sizeof hash_bitmap);
2876  }
2877 
2878  rec = btr_pcur_get_rec(pcur);
2879  page = page_align(rec);
2880  ut_ad(page_validate(page, ibuf->index));
2881 
2882  if (page_rec_is_supremum(rec)) {
2883  rec = page_rec_get_prev(rec);
2884  }
2885 
2886  for (;;) {
2887  if (page_rec_is_infimum(rec)) {
2888 
2889  break;
2890  }
2891 
2892  if (page_no != ibuf_rec_get_page_no(rec)
2893  || space != ibuf_rec_get_space(rec)) {
2894 
2895  goto count_later;
2896  }
2897 
2898  volume += ibuf_get_volume_buffered_count(
2899  rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
2900 
2901  rec = page_rec_get_prev(rec);
2902  ut_ad(page_align(rec) == page);
2903  }
2904 
2905  /* Look at the previous page */
2906 
2907  prev_page_no = btr_page_get_prev(page, mtr);
2908 
2909  if (prev_page_no == FIL_NULL) {
2910 
2911  goto count_later;
2912  }
2913 
2914  {
2915  buf_block_t* block;
2916 
2917  block = buf_page_get(
2918  IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
2919 
2920  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
2921 
2922 
2923  prev_page = buf_block_get_frame(block);
2924  ut_ad(page_validate(prev_page, ibuf->index));
2925  }
2926 
2927 #ifdef UNIV_BTR_DEBUG
2928  ut_a(btr_page_get_next(prev_page, mtr)
2929  == page_get_page_no(page));
2930 #endif /* UNIV_BTR_DEBUG */
2931 
2932  rec = page_get_supremum_rec(prev_page);
2933  rec = page_rec_get_prev(rec);
2934 
2935  for (;;) {
2936  if (page_rec_is_infimum(rec)) {
2937 
2938  /* We cannot go to yet a previous page, because we
2939  do not have the x-latch on it, and cannot acquire one
2940  because of the latching order: we have to give up */
2941 
2942  return(UNIV_PAGE_SIZE);
2943  }
2944 
2945  if (page_no != ibuf_rec_get_page_no(rec)
2946  || space != ibuf_rec_get_space(rec)) {
2947 
2948  goto count_later;
2949  }
2950 
2951  volume += ibuf_get_volume_buffered_count(
2952  rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
2953 
2954  rec = page_rec_get_prev(rec);
2955  ut_ad(page_align(rec) == prev_page);
2956  }
2957 
2958 count_later:
2959  rec = btr_pcur_get_rec(pcur);
2960 
2961  if (!page_rec_is_supremum(rec)) {
2962  rec = page_rec_get_next(rec);
2963  }
2964 
2965  for (;;) {
2966  if (page_rec_is_supremum(rec)) {
2967 
2968  break;
2969  }
2970 
2971  if (page_no != ibuf_rec_get_page_no(rec)
2972  || space != ibuf_rec_get_space(rec)) {
2973 
2974  return(volume);
2975  }
2976 
2977  volume += ibuf_get_volume_buffered_count(
2978  rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
2979 
2980  rec = page_rec_get_next(rec);
2981  }
2982 
2983  /* Look at the next page */
2984 
2985  next_page_no = btr_page_get_next(page, mtr);
2986 
2987  if (next_page_no == FIL_NULL) {
2988 
2989  return(volume);
2990  }
2991 
2992  {
2993  buf_block_t* block;
2994 
2995  block = buf_page_get(
2996  IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
2997 
2998  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
2999 
3000 
3001  next_page = buf_block_get_frame(block);
3002  ut_ad(page_validate(next_page, ibuf->index));
3003  }
3004 
3005 #ifdef UNIV_BTR_DEBUG
3006  ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
3007 #endif /* UNIV_BTR_DEBUG */
3008 
3009  rec = page_get_infimum_rec(next_page);
3010  rec = page_rec_get_next(rec);
3011 
3012  for (;;) {
3013  if (page_rec_is_supremum(rec)) {
3014 
3015  /* We give up */
3016 
3017  return(UNIV_PAGE_SIZE);
3018  }
3019 
3020  if (page_no != ibuf_rec_get_page_no(rec)
3021  || space != ibuf_rec_get_space(rec)) {
3022 
3023  return(volume);
3024  }
3025 
3026  volume += ibuf_get_volume_buffered_count(
3027  rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
3028 
3029  rec = page_rec_get_next(rec);
3030  ut_ad(page_align(rec) == next_page);
3031  }
3032 }
3033 
3034 /*********************************************************************/
3037 UNIV_INTERN
3038 void
3039 ibuf_update_max_tablespace_id(void)
3040 /*===============================*/
3041 {
3042  ulint max_space_id;
3043  const rec_t* rec;
3044  const byte* field;
3045  ulint len;
3046  btr_pcur_t pcur;
3047  mtr_t mtr;
3048 
3049  ut_a(!dict_table_is_comp(ibuf->index->table));
3050 
3051  ibuf_enter();
3052 
3053  mtr_start(&mtr);
3054 
3056  FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
3057 
3058  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
3059 
3060  btr_pcur_move_to_prev(&pcur, &mtr);
3061 
3062  if (btr_pcur_is_before_first_on_page(&pcur)) {
3063  /* The tree is empty */
3064 
3065  max_space_id = 0;
3066  } else {
3067  rec = btr_pcur_get_rec(&pcur);
3068 
3069  field = rec_get_nth_field_old(rec, 0, &len);
3070 
3071  ut_a(len == 4);
3072 
3073  max_space_id = mach_read_from_4(field);
3074  }
3075 
3076  mtr_commit(&mtr);
3077  ibuf_exit();
3078 
3079  /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
3080 
3081  fil_set_max_space_id_if_bigger(max_space_id);
3082 }
3083 
3084 /****************************************************************/
3089 static
3090 ulint
3091 ibuf_get_entry_counter_low(
3092 /*=======================*/
3093  const rec_t* rec,
3094  ulint space,
3095  ulint page_no)
3096 {
3097  ulint counter;
3098  const byte* field;
3099  ulint len;
3100 
3101  ut_ad(ibuf_inside());
3102  ut_ad(rec_get_n_fields_old(rec) > 2);
3103 
3104  field = rec_get_nth_field_old(rec, 1, &len);
3105 
3106  if (UNIV_UNLIKELY(len != 1)) {
3107  /* pre-4.1 format */
3108  ut_a(trx_doublewrite_must_reset_space_ids);
3109  ut_a(!trx_sys_multiple_tablespace_format);
3110 
3111  return(ULINT_UNDEFINED);
3112  }
3113 
3114  ut_a(trx_sys_multiple_tablespace_format);
3115 
3116  /* Check the tablespace identifier. */
3117  field = rec_get_nth_field_old(rec, 0, &len);
3118  ut_a(len == 4);
3119 
3120  if (mach_read_from_4(field) != space) {
3121 
3122  return(0);
3123  }
3124 
3125  /* Check the page offset. */
3126  field = rec_get_nth_field_old(rec, 2, &len);
3127  ut_a(len == 4);
3128 
3129  if (mach_read_from_4(field) != page_no) {
3130 
3131  return(0);
3132  }
3133 
3134  /* Check if the record contains a counter field. */
3135  field = rec_get_nth_field_old(rec, 3, &len);
3136 
3137  switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
3138  default:
3139  ut_error;
3140  case 0: /* ROW_FORMAT=REDUNDANT */
3141  case 1: /* ROW_FORMAT=COMPACT */
3142  return(ULINT_UNDEFINED);
3143 
3144  case IBUF_REC_INFO_SIZE:
3145  counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
3146  ut_a(counter < 0xFFFF);
3147  return(counter + 1);
3148  }
3149 }
3150 
3151 /****************************************************************/
3155 static
3156 ibool
3157 ibuf_set_entry_counter(
3158 /*===================*/
3159  dtuple_t* entry,
3160  ulint space,
3161  ulint page_no,
3162  btr_pcur_t* pcur,
3165  ibool is_optimistic,
3166  mtr_t* mtr)
3167 {
3168  dfield_t* field;
3169  byte* data;
3170  ulint counter = 0;
3171 
3172  /* pcur points to either a user rec or to a page's infimum record. */
3173  ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index));
3174 
3175  if (btr_pcur_is_on_user_rec(pcur)) {
3176 
3177  counter = ibuf_get_entry_counter_low(
3178  btr_pcur_get_rec(pcur), space, page_no);
3179 
3180  if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3181  /* The record lacks a counter field.
3182  Such old records must be merged before
3183  new records can be buffered. */
3184 
3185  return(FALSE);
3186  }
3187  } else if (btr_pcur_is_before_first_in_tree(pcur, mtr)) {
3188  /* Ibuf tree is either completely empty, or the insert
3189  position is at the very first record of a non-empty tree. In
3190  either case we have no previous records for (space,
3191  page_no). */
3192 
3193  counter = 0;
3194  } else if (btr_pcur_is_before_first_on_page(pcur)) {
3195  btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
3196 
3197  if (cursor->low_match < 3) {
3198  /* If low_match < 3, we know that the father node
3199  pointer did not contain the searched for (space,
3200  page_no), which means that the search ended on the
3201  right page regardless of the counter value, and
3202  since we're at the infimum record, there are no
3203  existing records. */
3204 
3205  counter = 0;
3206  } else {
3207  rec_t* rec;
3208  const page_t* page;
3209  buf_block_t* block;
3210  page_t* prev_page;
3211  ulint prev_page_no;
3212 
3213  ut_a(cursor->ibuf_cnt != ULINT_UNDEFINED);
3214 
3215  page = btr_pcur_get_page(pcur);
3216  prev_page_no = btr_page_get_prev(page, mtr);
3217 
3218  ut_a(prev_page_no != FIL_NULL);
3219 
3220  block = buf_page_get(
3221  IBUF_SPACE_ID, 0, prev_page_no,
3222  RW_X_LATCH, mtr);
3223 
3224  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
3225 
3226  prev_page = buf_block_get_frame(block);
3227 
3228  rec = page_rec_get_prev(
3229  page_get_supremum_rec(prev_page));
3230 
3232 
3233  counter = ibuf_get_entry_counter_low(
3234  rec, space, page_no);
3235 
3236  if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3237  /* The record lacks a counter field.
3238  Such old records must be merged before
3239  new records can be buffered. */
3240 
3241  return(FALSE);
3242  }
3243 
3244  if (counter < cursor->ibuf_cnt) {
3245  /* Search ended on the wrong page. */
3246 
3247  if (is_optimistic) {
3248  /* In an optimistic insert, we can
3249  shift the insert position to the left
3250  page, since it only needs an X-latch
3251  on the page itself, which the
3252  original search acquired for us. */
3253 
3255  ibuf->index, rec, block,
3256  btr_pcur_get_btr_cur(pcur));
3257  } else {
3258  /* We can't shift the insert
3259  position to the left page in a
3260  pessimistic insert since it would
3261  require an X-latch on the left
3262  page's left page, so we have to
3263  abort. */
3264 
3265  return(FALSE);
3266  }
3267  } else {
3268  /* The counter field in the father node is
3269  the same as we would insert; we don't know
3270  whether the insert should go to this page or
3271  the left page (the later fields can differ),
3272  so refuse the insert. */
3273 
3274  return(FALSE);
3275  }
3276  }
3277  } else {
3278  /* The cursor is not positioned at or before a user record. */
3279  return(FALSE);
3280  }
3281 
3282  /* Patch counter value in already built entry. */
3283  field = dtuple_get_nth_field(entry, 3);
3284  data = static_cast<byte *>(dfield_get_data(field));
3285 
3286  mach_write_to_2(data + IBUF_REC_OFFSET_COUNTER, counter);
3287 
3288  return(TRUE);
3289 }
3290 
3291 /*********************************************************************/
3295 static
3296 ulint
3297 ibuf_insert_low(
3298 /*============*/
3299  ulint mode,
3300  ibuf_op_t op,
3301  ibool no_counter,
3304  const dtuple_t* entry,
3305  ulint entry_size,
3307  dict_index_t* index,
3309  ulint space,
3310  ulint zip_size,
3311  ulint page_no,
3312  que_thr_t* thr)
3313 {
3314  big_rec_t* dummy_big_rec;
3315  btr_pcur_t pcur;
3316  btr_cur_t* cursor;
3317  dtuple_t* ibuf_entry;
3318  mem_heap_t* heap;
3319  ulint buffered;
3320  lint min_n_recs;
3321  rec_t* ins_rec;
3322  ibool old_bit_value;
3323  page_t* bitmap_page;
3324  buf_block_t* block;
3325  page_t* root;
3326  ulint err;
3327  ibool do_merge;
3328  ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
3329  ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
3330  ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
3331  ulint n_stored;
3332  mtr_t mtr;
3333  mtr_t bitmap_mtr;
3334 
3335  ut_a(!dict_index_is_clust(index));
3336  ut_ad(dtuple_check_typed(entry));
3337  ut_ad(ut_is_2pow(zip_size));
3338  ut_ad(!no_counter || op == IBUF_OP_INSERT);
3339  ut_a(op < IBUF_OP_COUNT);
3340 
3341  ut_a(trx_sys_multiple_tablespace_format);
3342 
3343  do_merge = FALSE;
3344 
3345  /* Perform dirty reads of ibuf->size and ibuf->max_size, to
3346  reduce ibuf_mutex contention. ibuf->max_size remains constant
3347  after ibuf_init_at_db_start(), but ibuf->size should be
3348  protected by ibuf_mutex. Given that ibuf->size fits in a
3349  machine word, this should be OK; at worst we are doing some
3350  excessive ibuf_contract() or occasionally skipping a
3351  ibuf_contract(). */
3352  if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
3353  /* Insert buffer is now too big, contract it but do not try
3354  to insert */
3355 
3356 
3357 #ifdef UNIV_IBUF_DEBUG
3358  fputs("Ibuf too big\n", stderr);
3359 #endif
3360  /* Use synchronous contract (== TRUE) */
3361  ibuf_contract(TRUE);
3362 
3363  return(DB_STRONG_FAIL);
3364  }
3365 
3366  heap = mem_heap_create(512);
3367 
3368  /* Build the entry which contains the space id and the page number
3369  as the first fields and the type information for other fields, and
3370  which will be inserted to the insert buffer. Using a counter value
3371  of 0xFFFF we find the last record for (space, page_no), from which
3372  we can then read the counter value N and use N + 1 in the record we
3373  insert. (We patch the ibuf_entry's counter field to the correct
3374  value just before actually inserting the entry.) */
3375 
3376  ibuf_entry = ibuf_entry_build(
3377  op, index, entry, space, page_no,
3378  no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
3379 
3380  /* Open a cursor to the insert buffer tree to calculate if we can add
3381  the new entry to it without exceeding the free space limit for the
3382  page. */
3383 
3384  if (mode == BTR_MODIFY_TREE) {
3385  for (;;) {
3386  ibuf_enter();
3387  mutex_enter(&ibuf_pessimistic_insert_mutex);
3388  mutex_enter(&ibuf_mutex);
3389 
3390  if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
3391 
3392  break;
3393  }
3394 
3395  mutex_exit(&ibuf_mutex);
3396  mutex_exit(&ibuf_pessimistic_insert_mutex);
3397  ibuf_exit();
3398 
3399  if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
3400 
3401  mem_heap_free(heap);
3402  return(DB_STRONG_FAIL);
3403  }
3404  }
3405  } else {
3406  ibuf_enter();
3407  }
3408 
3409  mtr_start(&mtr);
3410 
3411  btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
3412  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
3413 
3414  /* Find out the volume of already buffered inserts for the same index
3415  page */
3416  min_n_recs = 0;
3417  buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
3418  op == IBUF_OP_DELETE
3419  ? &min_n_recs
3420  : NULL, &mtr);
3421 
3422  if (op == IBUF_OP_DELETE
3423  && (min_n_recs < 2
3424  || buf_pool_watch_occurred(space, page_no))) {
3425  /* The page could become empty after the record is
3426  deleted, or the page has been read in to the buffer
3427  pool. Refuse to buffer the operation. */
3428 
3429  /* The buffer pool watch is needed for IBUF_OP_DELETE
3430  because of latching order considerations. We can
3431  check buf_pool_watch_occurred() only after latching
3432  the insert buffer B-tree pages that contain buffered
3433  changes for the page. We never buffer IBUF_OP_DELETE,
3434  unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
3435  been previously buffered for the page. Because there
3436  are buffered operations for the page, the insert
3437  buffer B-tree page latches held by mtr will guarantee
3438  that no changes for the user page will be merged
3439  before mtr_commit(&mtr). We must not mtr_commit(&mtr)
3440  until after the IBUF_OP_DELETE has been buffered. */
3441 
3442 fail_exit:
3443  if (mode == BTR_MODIFY_TREE) {
3444  mutex_exit(&ibuf_mutex);
3445  mutex_exit(&ibuf_pessimistic_insert_mutex);
3446  }
3447 
3448  err = DB_STRONG_FAIL;
3449  goto func_exit;
3450  }
3451 
3452  /* After this point, the page could still be loaded to the
3453  buffer pool, but we do not have to care about it, since we are
3454  holding a latch on the insert buffer leaf page that contains
3455  buffered changes for (space, page_no). If the page enters the
3456  buffer pool, buf_page_io_complete() for (space, page_no) will
3457  have to acquire a latch on the same insert buffer leaf page,
3458  which it cannot do until we have buffered the IBUF_OP_DELETE
3459  and done mtr_commit(&mtr) to release the latch. */
3460 
3461 #ifdef UNIV_IBUF_COUNT_DEBUG
3462  ut_a((buffered == 0) || ibuf_count_get(space, page_no));
3463 #endif
3464  mtr_start(&bitmap_mtr);
3465 
3466  bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
3467  zip_size, &bitmap_mtr);
3468 
3469  /* We check if the index page is suitable for buffered entries */
3470 
3471  if (buf_page_peek(space, page_no)
3472  || lock_rec_expl_exist_on_page(space, page_no)) {
3473 
3474  goto bitmap_fail;
3475  }
3476 
3477  if (op == IBUF_OP_INSERT) {
3478  ulint bits = ibuf_bitmap_page_get_bits(
3479  bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
3480  &bitmap_mtr);
3481 
3482  if (buffered + entry_size + page_dir_calc_reserved_space(1)
3483  > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
3484  /* Release the bitmap page latch early. */
3485  mtr_commit(&bitmap_mtr);
3486 
3487  /* It may not fit */
3488  do_merge = TRUE;
3489 
3490  ibuf_get_merge_page_nos(
3491  FALSE, btr_pcur_get_rec(&pcur),
3492  space_ids, space_versions,
3493  page_nos, &n_stored);
3494 
3495  goto fail_exit;
3496  }
3497  }
3498 
3499  /* Patch correct counter value to the entry to insert. This can
3500  change the insert position, which can result in the need to abort in
3501  some cases. */
3502  if (!no_counter
3503  && !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
3504  mode == BTR_MODIFY_PREV, &mtr)) {
3505 bitmap_fail:
3506  mtr_commit(&bitmap_mtr);
3507 
3508  goto fail_exit;
3509  }
3510 
3511  /* Set the bitmap bit denoting that the insert buffer contains
3512  buffered entries for this index page, if the bit is not set yet */
3513 
3514  old_bit_value = ibuf_bitmap_page_get_bits(
3515  bitmap_page, page_no, zip_size,
3516  IBUF_BITMAP_BUFFERED, &bitmap_mtr);
3517 
3518  if (!old_bit_value) {
3519  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
3520  IBUF_BITMAP_BUFFERED, TRUE,
3521  &bitmap_mtr);
3522  }
3523 
3524  mtr_commit(&bitmap_mtr);
3525 
3526  cursor = btr_pcur_get_btr_cur(&pcur);
3527 
3528  if (mode == BTR_MODIFY_PREV) {
3529  err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
3530  ibuf_entry, &ins_rec,
3531  &dummy_big_rec, 0, thr, &mtr);
3532  block = btr_cur_get_block(cursor);
3533  ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
3534 
3535  /* If this is the root page, update ibuf->empty. */
3536  if (UNIV_UNLIKELY(buf_block_get_page_no(block)
3537  == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
3538  const page_t* page_root = buf_block_get_frame(block);
3539 
3540  ut_ad(page_get_space_id(page_root) == IBUF_SPACE_ID);
3541  ut_ad(page_get_page_no(page_root)
3542  == FSP_IBUF_TREE_ROOT_PAGE_NO);
3543 
3544  ibuf->empty = (page_get_n_recs(page_root) == 0);
3545  }
3546  } else {
3547  ut_ad(mode == BTR_MODIFY_TREE);
3548 
3549  /* We acquire an x-latch to the root page before the insert,
3550  because a pessimistic insert releases the tree x-latch,
3551  which would cause the x-latching of the root after that to
3552  break the latching order. */
3553 
3554  root = ibuf_tree_root_get(&mtr);
3555 
3556  err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
3557  | BTR_NO_UNDO_LOG_FLAG,
3558  cursor,
3559  ibuf_entry, &ins_rec,
3560  &dummy_big_rec, 0, thr, &mtr);
3561  mutex_exit(&ibuf_pessimistic_insert_mutex);
3562  ibuf_size_update(root, &mtr);
3563  mutex_exit(&ibuf_mutex);
3564  ibuf->empty = (page_get_n_recs(root) == 0);
3565 
3566  block = btr_cur_get_block(cursor);
3567  ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
3568  }
3569 
3570  if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
3571  /* Update the page max trx id field */
3572  page_update_max_trx_id(block, NULL,
3573  thr_get_trx(thr)->id, &mtr);
3574  }
3575 
3576 func_exit:
3577 #ifdef UNIV_IBUF_COUNT_DEBUG
3578  if (err == DB_SUCCESS) {
3579  fprintf(stderr,
3580  "Incrementing ibuf count of space %lu page %lu\n"
3581  "from %lu by 1\n", space, page_no,
3582  ibuf_count_get(space, page_no));
3583 
3584  ibuf_count_set(space, page_no,
3585  ibuf_count_get(space, page_no) + 1);
3586  }
3587 #endif
3588 
3589  mtr_commit(&mtr);
3590  btr_pcur_close(&pcur);
3591  ibuf_exit();
3592 
3593  mem_heap_free(heap);
3594 
3595  if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
3596  ibuf_contract_after_insert(entry_size);
3597  }
3598 
3599  if (do_merge) {
3600 #ifdef UNIV_IBUF_DEBUG
3601  ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
3602 #endif
3603  buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
3604  page_nos, n_stored);
3605  }
3606 
3607  return(err);
3608 }
3609 
3610 /*********************************************************************/
3615 UNIV_INTERN
3616 ibool
3617 ibuf_insert(
3618 /*========*/
3619  ibuf_op_t op,
3620  const dtuple_t* entry,
3621  dict_index_t* index,
3622  ulint space,
3623  ulint zip_size,
3624  ulint page_no,
3625  que_thr_t* thr)
3626 {
3627  ulint err;
3628  ulint entry_size;
3629  ibool no_counter;
3630  /* Read the settable global variable ibuf_use only once in
3631  this function, so that we will have a consistent view of it. */
3632  ibuf_use_t use = ibuf_use;
3633 
3635  ut_ad(dtuple_check_typed(entry));
3636  ut_ad(ut_is_2pow(zip_size));
3637 
3638  ut_a(!dict_index_is_clust(index));
3639 
3640  no_counter = use <= IBUF_USE_INSERT;
3641 
3642  switch (op) {
3643  case IBUF_OP_INSERT:
3644  switch (use) {
3645  case IBUF_USE_NONE:
3646  case IBUF_USE_DELETE:
3647  case IBUF_USE_DELETE_MARK:
3648  return(FALSE);
3649  case IBUF_USE_INSERT:
3650  case IBUF_USE_INSERT_DELETE_MARK:
3651  case IBUF_USE_ALL:
3652  goto check_watch;
3653  case IBUF_USE_COUNT:
3654  break;
3655  }
3656  break;
3657  case IBUF_OP_DELETE_MARK:
3658  switch (use) {
3659  case IBUF_USE_NONE:
3660  case IBUF_USE_INSERT:
3661  return(FALSE);
3662  case IBUF_USE_DELETE_MARK:
3663  case IBUF_USE_DELETE:
3664  case IBUF_USE_INSERT_DELETE_MARK:
3665  case IBUF_USE_ALL:
3666  ut_ad(!no_counter);
3667  goto check_watch;
3668  case IBUF_USE_COUNT:
3669  break;
3670  }
3671  break;
3672  case IBUF_OP_DELETE:
3673  switch (use) {
3674  case IBUF_USE_NONE:
3675  case IBUF_USE_INSERT:
3676  case IBUF_USE_INSERT_DELETE_MARK:
3677  return(FALSE);
3678  case IBUF_USE_DELETE_MARK:
3679  case IBUF_USE_DELETE:
3680  case IBUF_USE_ALL:
3681  ut_ad(!no_counter);
3682  goto skip_watch;
3683  case IBUF_USE_COUNT:
3684  break;
3685  }
3686  break;
3687  case IBUF_OP_COUNT:
3688  break;
3689  }
3690 
3691  /* unknown op or use */
3692  ut_error;
3693 
3694 check_watch:
3695  /* If a thread attempts to buffer an insert on a page while a
3696  purge is in progress on the same page, the purge must not be
3697  buffered, because it could remove a record that was
3698  re-inserted later. For simplicity, we block the buffering of
3699  all operations on a page that has a purge pending.
3700 
3701  We do not check this in the IBUF_OP_DELETE case, because that
3702  would always trigger the buffer pool watch during purge and
3703  thus prevent the buffering of delete operations. We assume
3704  that the issuer of IBUF_OP_DELETE has called
3705  buf_pool_watch_set(space, page_no). */
3706 
3707  {
3708  buf_page_t* bpage;
3709  ulint fold = buf_page_address_fold(space, page_no);
3710  buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3711 
3712  buf_pool_mutex_enter(buf_pool);
3713  bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3714  buf_pool_mutex_exit(buf_pool);
3715 
3716  if (UNIV_LIKELY_NULL(bpage)) {
3717  /* A buffer pool watch has been set or the
3718  page has been read into the buffer pool.
3719  Do not buffer the request. If a purge operation
3720  is being buffered, have this request executed
3721  directly on the page in the buffer pool after the
3722  buffered entries for this page have been merged. */
3723  return(FALSE);
3724  }
3725  }
3726 
3727 skip_watch:
3728  entry_size = rec_get_converted_size(index, entry, 0);
3729 
3730  if (entry_size
3732  / 2) {
3733 
3734  return(FALSE);
3735  }
3736 
3737  err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
3738  entry, entry_size,
3739  index, space, zip_size, page_no, thr);
3740  if (err == DB_FAIL) {
3741  err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
3742  entry, entry_size,
3743  index, space, zip_size, page_no, thr);
3744  }
3745 
3746  if (err == DB_SUCCESS) {
3747 #ifdef UNIV_IBUF_DEBUG
3748  /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
3749  page_no, index->name); */
3750 #endif
3751  return(TRUE);
3752 
3753  } else {
3754  ut_a(err == DB_STRONG_FAIL);
3755 
3756  return(FALSE);
3757  }
3758 }
3759 
3760 /********************************************************************/
3763 static
3764 void
3765 ibuf_insert_to_index_page_low(
3766 /*==========================*/
3767  const dtuple_t* entry,
3768  buf_block_t* block,
3770  dict_index_t* index,
3771  mtr_t* mtr,
3772  page_cur_t* page_cur)
3774 {
3775  const page_t* page;
3776  ulint space;
3777  ulint page_no;
3778  ulint zip_size;
3779  const page_t* bitmap_page;
3780  ulint old_bits;
3781 
3782  if (UNIV_LIKELY
3783  (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
3784  return;
3785  }
3786 
3787  /* If the record did not fit, reorganize */
3788 
3789  btr_page_reorganize(block, index, mtr);
3790  page_cur_search(block, index, entry, PAGE_CUR_LE, page_cur);
3791 
3792  /* This time the record must fit */
3793 
3794  if (UNIV_LIKELY
3795  (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
3796  return;
3797  }
3798 
3799  page = buf_block_get_frame(block);
3800 
3801  ut_print_timestamp(stderr);
3802 
3803  fprintf(stderr,
3804  " InnoDB: Error: Insert buffer insert fails;"
3805  " page free %lu, dtuple size %lu\n",
3806  (ulong) page_get_max_insert_size(page, 1),
3807  (ulong) rec_get_converted_size(index, entry, 0));
3808  fputs("InnoDB: Cannot insert index record ", stderr);
3809  dtuple_print(stderr, entry);
3810  fputs("\nInnoDB: The table where this index record belongs\n"
3811  "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
3812  "InnoDB: that table.\n", stderr);
3813 
3814  space = page_get_space_id(page);
3815  zip_size = buf_block_get_zip_size(block);
3816  page_no = page_get_page_no(page);
3817 
3818  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
3819  old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
3820  IBUF_BITMAP_FREE, mtr);
3821 
3822  fprintf(stderr,
3823  "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
3824  (ulong) space, (ulong) page_no,
3825  (ulong) zip_size, (ulong) old_bits);
3826 
3827  fputs("InnoDB: Submit a detailed bug report"
3828  " to http://bugs.mysql.com\n", stderr);
3829 }
3830 
3831 /************************************************************************
3832 During merge, inserts to an index page a secondary index entry extracted
3833 from the insert buffer. */
3834 static
3835 void
3836 ibuf_insert_to_index_page(
3837 /*======================*/
3838  const dtuple_t* entry,
3839  buf_block_t* block,
3841  dict_index_t* index,
3842  mtr_t* mtr)
3843 {
3844  page_cur_t page_cur;
3845  ulint low_match;
3846  page_t* page = buf_block_get_frame(block);
3847  rec_t* rec;
3848 
3849  ut_ad(ibuf_inside());
3850  ut_ad(dtuple_check_typed(entry));
3851  ut_ad(!buf_block_align(page)->is_hashed);
3852 
3853  if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
3854  != (ibool)!!page_is_comp(page))) {
3855  fputs("InnoDB: Trying to insert a record from"
3856  " the insert buffer to an index page\n"
3857  "InnoDB: but the 'compact' flag does not match!\n",
3858  stderr);
3859  goto dump;
3860  }
3861 
3862  rec = page_rec_get_next(page_get_infimum_rec(page));
3863 
3864  if (page_rec_is_supremum(rec)) {
3865  fputs("InnoDB: Trying to insert a record from"
3866  " the insert buffer to an index page\n"
3867  "InnoDB: but the index page is empty!\n",
3868  stderr);
3869  goto dump;
3870  }
3871 
3872  if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
3873  != dtuple_get_n_fields(entry))) {
3874  fputs("InnoDB: Trying to insert a record from"
3875  " the insert buffer to an index page\n"
3876  "InnoDB: but the number of fields does not match!\n",
3877  stderr);
3878 dump:
3879  buf_page_print(page, 0);
3880 
3881  dtuple_print(stderr, entry);
3882 
3883  fputs("InnoDB: The table where where"
3884  " this index record belongs\n"
3885  "InnoDB: is now probably corrupt."
3886  " Please run CHECK TABLE on\n"
3887  "InnoDB: your tables.\n"
3888  "InnoDB: Submit a detailed bug report to"
3889  " http://bugs.mysql.com!\n", stderr);
3890 
3891  return;
3892  }
3893 
3894  low_match = page_cur_search(block, index, entry,
3895  PAGE_CUR_LE, &page_cur);
3896 
3897  if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
3898  mem_heap_t* heap;
3899  upd_t* update;
3900  ulint* offsets;
3901  page_zip_des_t* page_zip;
3902 
3903  rec = page_cur_get_rec(&page_cur);
3904 
3905  /* This is based on
3906  row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
3908 
3909  heap = mem_heap_create(1024);
3910 
3911  offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
3912  &heap);
3914  index, entry, rec, NULL, heap);
3915 
3916  page_zip = buf_block_get_page_zip(block);
3917 
3918  if (update->n_fields == 0) {
3919  /* The records only differ in the delete-mark.
3920  Clear the delete-mark, like we did before
3921  Bug #56680 was fixed. */
3922  btr_cur_set_deleted_flag_for_ibuf(
3923  rec, page_zip, FALSE, mtr);
3924 updated_in_place:
3925  mem_heap_free(heap);
3926  return;
3927  }
3928 
3929  /* Copy the info bits. Clear the delete-mark. */
3930  update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
3931  update->info_bits &= ~REC_INFO_DELETED_FLAG;
3932 
3933  /* We cannot invoke btr_cur_optimistic_update() here,
3934  because we do not have a btr_cur_t or que_thr_t,
3935  as the insert buffer merge occurs at a very low level. */
3936  if (!row_upd_changes_field_size_or_external(index, offsets,
3937  update)
3938  && (!page_zip || btr_cur_update_alloc_zip(
3939  page_zip, block, index,
3940  rec_offs_size(offsets), FALSE, mtr))) {
3941  /* This is the easy case. Do something similar
3942  to btr_cur_update_in_place(). */
3943  row_upd_rec_in_place(rec, index, offsets,
3944  update, page_zip);
3945  goto updated_in_place;
3946  }
3947 
3948  /* A collation may identify values that differ in
3949  storage length.
3950  Some examples (1 or 2 bytes):
3951  utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
3952  utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
3953  utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
3954 
3955  latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
3956 
3957  Examples of a character (3-byte UTF-8 sequence)
3958  identified with 2 or 4 characters (1-byte UTF-8 sequences):
3959 
3960  utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
3961  utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
3962  */
3963 
3964  /* Delete the different-length record, and insert the
3965  buffered one. */
3966 
3967  lock_rec_store_on_page_infimum(block, rec);
3968  page_cur_delete_rec(&page_cur, index, offsets, mtr);
3969  page_cur_move_to_prev(&page_cur);
3970  mem_heap_free(heap);
3971 
3972  ibuf_insert_to_index_page_low(entry, block, index, mtr,
3973  &page_cur);
3974  lock_rec_restore_from_page_infimum(block, rec, block);
3975  } else {
3976  ibuf_insert_to_index_page_low(entry, block, index, mtr,
3977  &page_cur);
3978  }
3979 }
3980 
3981 /****************************************************************/
3984 static
3985 void
3986 ibuf_set_del_mark(
3987 /*==============*/
3988  const dtuple_t* entry,
3989  buf_block_t* block,
3990  const dict_index_t* index,
3991  mtr_t* mtr)
3992 {
3993  page_cur_t page_cur;
3994  ulint low_match;
3995 
3996  ut_ad(ibuf_inside());
3997  ut_ad(dtuple_check_typed(entry));
3998 
3999  low_match = page_cur_search(
4000  block, index, entry, PAGE_CUR_LE, &page_cur);
4001 
4002  if (low_match == dtuple_get_n_fields(entry)) {
4003  rec_t* rec;
4004  page_zip_des_t* page_zip;
4005 
4006  rec = page_cur_get_rec(&page_cur);
4007  page_zip = page_cur_get_page_zip(&page_cur);
4008 
4009  /* Delete mark the old index record. According to a
4010  comment in row_upd_sec_index_entry(), it can already
4011  have been delete marked if a lock wait occurred in
4012  row_ins_index_entry() in a previous invocation of
4013  row_upd_sec_index_entry(). */
4014 
4015  if (UNIV_LIKELY
4017  rec, dict_table_is_comp(index->table)))) {
4018  btr_cur_set_deleted_flag_for_ibuf(rec, page_zip,
4019  TRUE, mtr);
4020  }
4021  } else {
4022  ut_print_timestamp(stderr);
4023  fputs(" InnoDB: unable to find a record to delete-mark\n",
4024  stderr);
4025  fputs("InnoDB: tuple ", stderr);
4026  dtuple_print(stderr, entry);
4027  fputs("\n"
4028  "InnoDB: record ", stderr);
4029  rec_print(stderr, page_cur_get_rec(&page_cur), index);
4030  putc('\n', stderr);
4031  fputs("\n"
4032  "InnoDB: Submit a detailed bug report"
4033  " to http://bugs.mysql.com\n", stderr);
4034  ut_ad(0);
4035  }
4036 }
4037 
4038 /****************************************************************/
4040 static
4041 void
4042 ibuf_delete(
4043 /*========*/
4044  const dtuple_t* entry,
4045  buf_block_t* block,
4046  dict_index_t* index,
4047  mtr_t* mtr)
4049 {
4050  page_cur_t page_cur;
4051  ulint low_match;
4052 
4053  ut_ad(ibuf_inside());
4054  ut_ad(dtuple_check_typed(entry));
4055 
4056  low_match = page_cur_search(
4057  block, index, entry, PAGE_CUR_LE, &page_cur);
4058 
4059  if (low_match == dtuple_get_n_fields(entry)) {
4060  page_zip_des_t* page_zip= buf_block_get_page_zip(block);
4061  page_t* page = buf_block_get_frame(block);
4062  rec_t* rec = page_cur_get_rec(&page_cur);
4063 
4064  /* TODO: the below should probably be a separate function,
4065  it's a bastardized version of btr_cur_optimistic_delete. */
4066 
4067  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4068  ulint* offsets = offsets_;
4069  mem_heap_t* heap = NULL;
4070  ulint max_ins_size;
4071 
4072  rec_offs_init(offsets_);
4073 
4074  offsets = rec_get_offsets(
4075  rec, index, offsets, ULINT_UNDEFINED, &heap);
4076 
4077  /* Refuse to delete the last record. */
4078  ut_a(page_get_n_recs(page) > 1);
4079 
4080  /* The record should have been marked for deletion. */
4081  ut_ad(REC_INFO_DELETED_FLAG
4082  & rec_get_info_bits(rec, page_is_comp(page)));
4083 
4084  lock_update_delete(block, rec);
4085 
4086  if (!page_zip) {
4087  max_ins_size
4089  page, 1);
4090  }
4091 #ifdef UNIV_ZIP_DEBUG
4092  ut_a(!page_zip || page_zip_validate(page_zip, page));
4093 #endif /* UNIV_ZIP_DEBUG */
4094  page_cur_delete_rec(&page_cur, index, offsets, mtr);
4095 #ifdef UNIV_ZIP_DEBUG
4096  ut_a(!page_zip || page_zip_validate(page_zip, page));
4097 #endif /* UNIV_ZIP_DEBUG */
4098 
4099  if (page_zip) {
4100  ibuf_update_free_bits_zip(block, mtr);
4101  } else {
4102  ibuf_update_free_bits_low(block, max_ins_size, mtr);
4103  }
4104 
4105  if (UNIV_LIKELY_NULL(heap)) {
4106  mem_heap_free(heap);
4107  }
4108  } else {
4109  /* The record must have been purged already. */
4110  }
4111 }
4112 
4113 /*********************************************************************/
4116 static __attribute__((nonnull))
4117 ibool
4118 ibuf_restore_pos(
4119 /*=============*/
4120  ulint space,
4121  ulint page_no,
4123  const dtuple_t* search_tuple,
4125  ulint mode,
4126  btr_pcur_t* pcur,
4128  mtr_t* mtr)
4129 {
4130  ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
4131 
4132  if (btr_pcur_restore_position(mode, pcur, mtr)) {
4133 
4134  return(TRUE);
4135  }
4136 
4137  if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
4138  /* The tablespace has been dropped. It is possible
4139  that another thread has deleted the insert buffer
4140  entry. Do not complain. */
4141  btr_pcur_commit_specify_mtr(pcur, mtr);
4142  } else {
4143  fprintf(stderr,
4144  "InnoDB: ERROR: Submit the output to"
4145  " http://bugs.mysql.com\n"
4146  "InnoDB: ibuf cursor restoration fails!\n"
4147  "InnoDB: ibuf record inserted to page %lu:%lu\n",
4148  (ulong) space, (ulong) page_no);
4149  fflush(stderr);
4150 
4151  rec_print_old(stderr, btr_pcur_get_rec(pcur));
4152  rec_print_old(stderr, pcur->old_rec);
4153  dtuple_print(stderr, search_tuple);
4154 
4155  rec_print_old(stderr,
4157  fflush(stderr);
4158 
4159  btr_pcur_commit_specify_mtr(pcur, mtr);
4160 
4161  fputs("InnoDB: Validating insert buffer tree:\n", stderr);
4162  if (!btr_validate_index(ibuf->index, NULL)) {
4163  ut_error;
4164  }
4165 
4166  fprintf(stderr, "InnoDB: ibuf tree ok\n");
4167  fflush(stderr);
4168  }
4169 
4170  return(FALSE);
4171 }
4172 
4173 /*********************************************************************/
4178 static
4179 ibool
4180 ibuf_delete_rec(
4181 /*============*/
4182  ulint space,
4183  ulint page_no,
4185  btr_pcur_t* pcur,
4187  const dtuple_t* search_tuple,
4189  mtr_t* mtr)
4190 {
4191  ibool success;
4192  page_t* root;
4193  ulint err;
4194 
4195  ut_ad(ibuf_inside());
4197  ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
4198  ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
4199 
4200  success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
4201 
4202  if (success) {
4203  if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
4204  /* If a B-tree page is empty, it must be the root page
4205  and the whole B-tree must be empty. InnoDB does not
4206  allow empty B-tree pages other than the root. */
4207  root = btr_pcur_get_page(pcur);
4208 
4209  ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
4210  ut_ad(page_get_page_no(root)
4211  == FSP_IBUF_TREE_ROOT_PAGE_NO);
4212 
4213  /* ibuf->empty is protected by the root page latch.
4214  Before the deletion, it had to be FALSE. */
4215  ut_ad(!ibuf->empty);
4216  ibuf->empty = TRUE;
4217  }
4218 
4219 #ifdef UNIV_IBUF_COUNT_DEBUG
4220  fprintf(stderr,
4221  "Decrementing ibuf count of space %lu page %lu\n"
4222  "from %lu by 1\n", space, page_no,
4223  ibuf_count_get(space, page_no));
4224  ibuf_count_set(space, page_no,
4225  ibuf_count_get(space, page_no) - 1);
4226 #endif
4227  return(FALSE);
4228  }
4229 
4231  ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
4232  ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
4233 
4234  /* We have to resort to a pessimistic delete from ibuf */
4235  btr_pcur_store_position(pcur, mtr);
4236 
4237  btr_pcur_commit_specify_mtr(pcur, mtr);
4238 
4239  mutex_enter(&ibuf_mutex);
4240 
4241  mtr_start(mtr);
4242 
4243  if (!ibuf_restore_pos(space, page_no, search_tuple,
4244  BTR_MODIFY_TREE, pcur, mtr)) {
4245 
4246  mutex_exit(&ibuf_mutex);
4247  goto func_exit;
4248  }
4249 
4250  root = ibuf_tree_root_get(mtr);
4251 
4252  btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
4253  RB_NONE, mtr);
4254  ut_a(err == DB_SUCCESS);
4255 
4256 #ifdef UNIV_IBUF_COUNT_DEBUG
4257  ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
4258 #endif
4259  ibuf_size_update(root, mtr);
4260  mutex_exit(&ibuf_mutex);
4261 
4262  ibuf->empty = (page_get_n_recs(root) == 0);
4263  btr_pcur_commit_specify_mtr(pcur, mtr);
4264 
4265 func_exit:
4266  btr_pcur_close(pcur);
4267 
4268  return(TRUE);
4269 }
4270 
4271 /*********************************************************************/
4278 UNIV_INTERN
4279 void
4281 /*==========================*/
4282  buf_block_t* block,
4285  ulint space,
4286  ulint page_no,
4287  ulint zip_size,
4289  ibool update_ibuf_bitmap)
4294 {
4295  mem_heap_t* heap;
4296  btr_pcur_t pcur;
4297  dtuple_t* search_tuple;
4298 #ifdef UNIV_IBUF_DEBUG
4299  ulint volume = 0;
4300 #endif
4301  page_zip_des_t* page_zip = NULL;
4302  ibool tablespace_being_deleted = FALSE;
4303  ibool corruption_noticed = FALSE;
4304  mtr_t mtr;
4305 
4306  /* Counts for merged & discarded operations. */
4307  ulint mops[IBUF_OP_COUNT];
4308  ulint dops[IBUF_OP_COUNT];
4309 
4310  ut_ad(!block || buf_block_get_space(block) == space);
4311  ut_ad(!block || buf_block_get_page_no(block) == page_no);
4312  ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
4313 
4314  if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
4315  || trx_sys_hdr_page(space, page_no)) {
4316  return;
4317  }
4318 
4319  /* We cannot refer to zip_size in the following, because
4320  zip_size is passed as ULINT_UNDEFINED (it is unknown) when
4321  buf_read_ibuf_merge_pages() is merging (discarding) changes
4322  for a dropped tablespace. When block != NULL or
4323  update_ibuf_bitmap is specified, the zip_size must be known.
4324  That is why we will repeat the check below, with zip_size in
4325  place of 0. Passing zip_size as 0 assumes that the
4326  uncompressed page size always is a power-of-2 multiple of the
4327  compressed page size. */
4328 
4329  if (ibuf_fixed_addr_page(space, 0, page_no)
4330  || fsp_descr_page(0, page_no)) {
4331  return;
4332  }
4333 
4334  if (UNIV_LIKELY(update_ibuf_bitmap)) {
4335  ut_a(ut_is_2pow(zip_size));
4336 
4337  if (ibuf_fixed_addr_page(space, zip_size, page_no)
4338  || fsp_descr_page(zip_size, page_no)) {
4339  return;
4340  }
4341 
4342  /* If the following returns FALSE, we get the counter
4343  incremented, and must decrement it when we leave this
4344  function. When the counter is > 0, that prevents tablespace
4345  from being dropped. */
4346 
4347  tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
4348 
4349  if (UNIV_UNLIKELY(tablespace_being_deleted)) {
4350  /* Do not try to read the bitmap page from space;
4351  just delete the ibuf records for the page */
4352 
4353  block = NULL;
4354  update_ibuf_bitmap = FALSE;
4355  } else {
4356  page_t* bitmap_page;
4357 
4358  mtr_start(&mtr);
4359 
4360  bitmap_page = ibuf_bitmap_get_map_page(
4361  space, page_no, zip_size, &mtr);
4362 
4363  if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
4364  zip_size,
4365  IBUF_BITMAP_BUFFERED,
4366  &mtr)) {
4367  /* No inserts buffered for this page */
4368  mtr_commit(&mtr);
4369 
4370  if (!tablespace_being_deleted) {
4371  fil_decr_pending_ibuf_merges(space);
4372  }
4373 
4374  return;
4375  }
4376  mtr_commit(&mtr);
4377  }
4378  } else if (block
4379  && (ibuf_fixed_addr_page(space, zip_size, page_no)
4380  || fsp_descr_page(zip_size, page_no))) {
4381 
4382  return;
4383  }
4384 
4385  ibuf_enter();
4386 
4387  heap = mem_heap_create(512);
4388 
4389  if (!trx_sys_multiple_tablespace_format) {
4390  ut_a(trx_doublewrite_must_reset_space_ids);
4391  search_tuple = ibuf_search_tuple_build(space, page_no, heap);
4392  } else {
4393  search_tuple = ibuf_new_search_tuple_build(space, page_no,
4394  heap);
4395  }
4396 
4397  if (block) {
4398  /* Move the ownership of the x-latch on the page to this OS
4399  thread, so that we can acquire a second x-latch on it. This
4400  is needed for the insert operations to the index page to pass
4401  the debug checks. */
4402 
4404  page_zip = buf_block_get_page_zip(block);
4405 
4406  if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
4407  != FIL_PAGE_INDEX)
4408  || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
4409 
4410  page_t* bitmap_page;
4411 
4412  corruption_noticed = TRUE;
4413 
4414  ut_print_timestamp(stderr);
4415 
4416  mtr_start(&mtr);
4417 
4418  fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
4419  stderr);
4420 
4421  bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
4422  zip_size, &mtr);
4423  buf_page_print(bitmap_page, 0);
4424 
4425  mtr_commit(&mtr);
4426 
4427  fputs("\nInnoDB: Dump of the page:\n", stderr);
4428 
4429  buf_page_print(block->frame, 0);
4430 
4431  fprintf(stderr,
4432  "InnoDB: Error: corruption in the tablespace."
4433  " Bitmap shows insert\n"
4434  "InnoDB: buffer records to page n:o %lu"
4435  " though the page\n"
4436  "InnoDB: type is %lu, which is"
4437  " not an index leaf page!\n"
4438  "InnoDB: We try to resolve the problem"
4439  " by skipping the insert buffer\n"
4440  "InnoDB: merge for this page."
4441  " Please run CHECK TABLE on your tables\n"
4442  "InnoDB: to determine if they are corrupt"
4443  " after this.\n\n"
4444  "InnoDB: Please submit a detailed bug report"
4445  " to http://bugs.mysql.com\n\n",
4446  (ulong) page_no,
4447  (ulong)
4448  fil_page_get_type(block->frame));
4449  }
4450  }
4451 
4452  memset(mops, 0, sizeof(mops));
4453  memset(dops, 0, sizeof(dops));
4454 
4455 loop:
4456  mtr_start(&mtr);
4457 
4458  if (block) {
4459  ibool success;
4460 
4461  success = buf_page_get_known_nowait(
4462  RW_X_LATCH, block,
4463  BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
4464 
4465  ut_a(success);
4466 
4467  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
4468  }
4469 
4470  /* Position pcur in the insert buffer at the first entry for this
4471  index page */
4472  btr_pcur_open_on_user_rec(
4473  ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
4474  &pcur, &mtr);
4475 
4476  if (!btr_pcur_is_on_user_rec(&pcur)) {
4477  ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
4478 
4479  goto reset_bit;
4480  }
4481 
4482  for (;;) {
4483  rec_t* rec;
4484 
4486 
4487  rec = btr_pcur_get_rec(&pcur);
4488 
4489  /* Check if the entry is for this index page */
4490  if (ibuf_rec_get_page_no(rec) != page_no
4491  || ibuf_rec_get_space(rec) != space) {
4492 
4493  if (block) {
4495  block->frame, page_zip, &mtr);
4496  }
4497 
4498  goto reset_bit;
4499  }
4500 
4501  if (UNIV_UNLIKELY(corruption_noticed)) {
4502  fputs("InnoDB: Discarding record\n ", stderr);
4503  rec_print_old(stderr, rec);
4504  fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
4505  } else if (block) {
4506  /* Now we have at pcur a record which should be
4507  applied on the index page; NOTE that the call below
4508  copies pointers to fields in rec, and we must
4509  keep the latch to the rec page until the
4510  insertion is finished! */
4511  dtuple_t* entry;
4512  trx_id_t max_trx_id;
4513  dict_index_t* dummy_index;
4514  ibuf_op_t op = ibuf_rec_get_op_type(rec);
4515 
4516  max_trx_id = page_get_max_trx_id(page_align(rec));
4517  page_update_max_trx_id(block, page_zip, max_trx_id,
4518  &mtr);
4519 
4520  ut_ad(page_validate(page_align(rec), ibuf->index));
4521 
4522  entry = ibuf_build_entry_from_ibuf_rec(
4523  rec, heap, &dummy_index);
4524 
4525  ut_ad(page_validate(block->frame, dummy_index));
4526 
4527  switch (op) {
4528  ibool success;
4529  case IBUF_OP_INSERT:
4530 #ifdef UNIV_IBUF_DEBUG
4531  volume += rec_get_converted_size(
4532  dummy_index, entry, 0);
4533 
4534  volume += page_dir_calc_reserved_space(1);
4535 
4536  ut_a(volume <= 4 * UNIV_PAGE_SIZE
4537  / IBUF_PAGE_SIZE_PER_FREE_SPACE);
4538 #endif
4539  ibuf_insert_to_index_page(
4540  entry, block, dummy_index, &mtr);
4541  break;
4542 
4543  case IBUF_OP_DELETE_MARK:
4544  ibuf_set_del_mark(
4545  entry, block, dummy_index, &mtr);
4546  break;
4547 
4548  case IBUF_OP_DELETE:
4549  ibuf_delete(entry, block, dummy_index, &mtr);
4550  /* Because ibuf_delete() will latch an
4551  insert buffer bitmap page, commit mtr
4552  before latching any further pages.
4553  Store and restore the cursor position. */
4554  ut_ad(rec == btr_pcur_get_rec(&pcur));
4556  ut_ad(ibuf_rec_get_page_no(rec) == page_no);
4557  ut_ad(ibuf_rec_get_space(rec) == space);
4558 
4559  btr_pcur_store_position(&pcur, &mtr);
4560  btr_pcur_commit_specify_mtr(&pcur, &mtr);
4561 
4562  mtr_start(&mtr);
4563 
4564  success = buf_page_get_known_nowait(
4565  RW_X_LATCH, block,
4566  BUF_KEEP_OLD,
4567  __FILE__, __LINE__, &mtr);
4568  ut_a(success);
4569 
4570  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
4571 
4572  if (!ibuf_restore_pos(space, page_no,
4573  search_tuple,
4575  &pcur, &mtr)) {
4576 
4577  mtr_commit(&mtr);
4578  mops[op]++;
4579  ibuf_dummy_index_free(dummy_index);
4580  goto loop;
4581  }
4582 
4583  break;
4584  default:
4585  ut_error;
4586  }
4587 
4588  mops[op]++;
4589 
4590  ibuf_dummy_index_free(dummy_index);
4591  } else {
4592  dops[ibuf_rec_get_op_type(rec)]++;
4593  }
4594 
4595  /* Delete the record from ibuf */
4596  if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
4597  &mtr)) {
4598  /* Deletion was pessimistic and mtr was committed:
4599  we start from the beginning again */
4600 
4601  goto loop;
4602  } else if (btr_pcur_is_after_last_on_page(&pcur)) {
4603  mtr_commit(&mtr);
4604  btr_pcur_close(&pcur);
4605 
4606  goto loop;
4607  }
4608  }
4609 
4610 reset_bit:
4611  if (UNIV_LIKELY(update_ibuf_bitmap)) {
4612  page_t* bitmap_page;
4613 
4614  bitmap_page = ibuf_bitmap_get_map_page(
4615  space, page_no, zip_size, &mtr);
4616 
4617  ibuf_bitmap_page_set_bits(
4618  bitmap_page, page_no, zip_size,
4619  IBUF_BITMAP_BUFFERED, FALSE, &mtr);
4620 
4621  if (block) {
4622  ulint old_bits = ibuf_bitmap_page_get_bits(
4623  bitmap_page, page_no, zip_size,
4624  IBUF_BITMAP_FREE, &mtr);
4625 
4626  ulint new_bits = ibuf_index_page_calc_free(
4627  zip_size, block);
4628 
4629  if (old_bits != new_bits) {
4630  ibuf_bitmap_page_set_bits(
4631  bitmap_page, page_no, zip_size,
4632  IBUF_BITMAP_FREE, new_bits, &mtr);
4633  }
4634  }
4635  }
4636 
4637  mtr_commit(&mtr);
4638  btr_pcur_close(&pcur);
4639  mem_heap_free(heap);
4640 
4641 #ifdef HAVE_ATOMIC_BUILTINS
4642  os_atomic_increment_ulint(&ibuf->n_merges, 1);
4643  ibuf_add_ops(ibuf->n_merged_ops, mops);
4644  ibuf_add_ops(ibuf->n_discarded_ops, dops);
4645 #else /* HAVE_ATOMIC_BUILTINS */
4646  /* Protect our statistics keeping from race conditions */
4647  mutex_enter(&ibuf_mutex);
4648 
4649  ibuf->n_merges++;
4650  ibuf_add_ops(ibuf->n_merged_ops, mops);
4651  ibuf_add_ops(ibuf->n_discarded_ops, dops);
4652 
4653  mutex_exit(&ibuf_mutex);
4654 #endif /* HAVE_ATOMIC_BUILTINS */
4655 
4656  if (update_ibuf_bitmap && !tablespace_being_deleted) {
4657 
4658  fil_decr_pending_ibuf_merges(space);
4659  }
4660 
4661  ibuf_exit();
4662 
4663 #ifdef UNIV_IBUF_COUNT_DEBUG
4664  ut_a(ibuf_count_get(space, page_no) == 0);
4665 #endif
4666 }
4667 
4668 /*********************************************************************/
4673 UNIV_INTERN
4674 void
4676 /*============================*/
4677  ulint space)
4678 {
4679  mem_heap_t* heap;
4680  btr_pcur_t pcur;
4681  dtuple_t* search_tuple;
4682  rec_t* ibuf_rec;
4683  ulint page_no;
4684  ibool closed;
4685  mtr_t mtr;
4686 
4687  /* Counts for discarded operations. */
4688  ulint dops[IBUF_OP_COUNT];
4689 
4690  heap = mem_heap_create(512);
4691 
4692  /* Use page number 0 to build the search tuple so that we get the
4693  cursor positioned at the first entry for this space id */
4694 
4695  search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
4696 
4697  memset(dops, 0, sizeof(dops));
4698 loop:
4699  ibuf_enter();
4700 
4701  mtr_start(&mtr);
4702 
4703  /* Position pcur in the insert buffer at the first entry for the
4704  space */
4705  btr_pcur_open_on_user_rec(
4706  ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
4707  &pcur, &mtr);
4708 
4709  if (!btr_pcur_is_on_user_rec(&pcur)) {
4710  ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
4711 
4712  goto leave_loop;
4713  }
4714 
4715  for (;;) {
4717 
4718  ibuf_rec = btr_pcur_get_rec(&pcur);
4719 
4720  /* Check if the entry is for this space */
4721  if (ibuf_rec_get_space(ibuf_rec) != space) {
4722 
4723  goto leave_loop;
4724  }
4725 
4726  page_no = ibuf_rec_get_page_no(ibuf_rec);
4727 
4728  dops[ibuf_rec_get_op_type(ibuf_rec)]++;
4729 
4730  /* Delete the record from ibuf */
4731  closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
4732  &mtr);
4733  if (closed) {
4734  /* Deletion was pessimistic and mtr was committed:
4735  we start from the beginning again */
4736 
4737  ibuf_exit();
4738 
4739  goto loop;
4740  }
4741 
4742  if (btr_pcur_is_after_last_on_page(&pcur)) {
4743  mtr_commit(&mtr);
4744  btr_pcur_close(&pcur);
4745 
4746  ibuf_exit();
4747 
4748  goto loop;
4749  }
4750  }
4751 
4752 leave_loop:
4753  mtr_commit(&mtr);
4754  btr_pcur_close(&pcur);
4755 
4756 #ifdef HAVE_ATOMIC_BUILTINS
4757  ibuf_add_ops(ibuf->n_discarded_ops, dops);
4758 #else /* HAVE_ATOMIC_BUILTINS */
4759  /* Protect our statistics keeping from race conditions */
4760  mutex_enter(&ibuf_mutex);
4761  ibuf_add_ops(ibuf->n_discarded_ops, dops);
4762  mutex_exit(&ibuf_mutex);
4763 #endif /* HAVE_ATOMIC_BUILTINS */
4764 
4765  ibuf_exit();
4766 
4767  mem_heap_free(heap);
4768 }
4769 
4770 /******************************************************************/
4773 UNIV_INTERN
4774 ibool
4775 ibuf_is_empty(void)
4776 /*===============*/
4777 {
4778  ibool is_empty;
4779  const page_t* root;
4780  mtr_t mtr;
4781 
4782  ibuf_enter();
4783  mtr_start(&mtr);
4784 
4785  mutex_enter(&ibuf_mutex);
4786  root = ibuf_tree_root_get(&mtr);
4787  mutex_exit(&ibuf_mutex);
4788 
4789  is_empty = (page_get_n_recs(root) == 0);
4790  mtr_commit(&mtr);
4791  ibuf_exit();
4792 
4793  ut_a(is_empty == ibuf->empty);
4794 
4795  return(is_empty);
4796 }
4797 
4798 /******************************************************************/
4800 UNIV_INTERN
4801 void
4802 ibuf_print(
4803 /*=======*/
4804  FILE* file)
4805 {
4806 #ifdef UNIV_IBUF_COUNT_DEBUG
4807  ulint i;
4808  ulint j;
4809 #endif
4810 
4811  mutex_enter(&ibuf_mutex);
4812 
4813  fprintf(file,
4814  "Ibuf: size %lu, free list len %lu,"
4815  " seg size %lu, %lu merges\n",
4816  (ulong) ibuf->size,
4817  (ulong) ibuf->free_list_len,
4818  (ulong) ibuf->seg_size,
4819  (ulong) ibuf->n_merges);
4820 
4821  fputs("merged operations:\n ", file);
4822  ibuf_print_ops(ibuf->n_merged_ops, file);
4823 
4824  fputs("discarded operations:\n ", file);
4825  ibuf_print_ops(ibuf->n_discarded_ops, file);
4826 
4827 #ifdef UNIV_IBUF_COUNT_DEBUG
4828  for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
4829  for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
4830  ulint count = ibuf_count_get(i, j);
4831 
4832  if (count > 0) {
4833  fprintf(stderr,
4834  "Ibuf count for space/page %lu/%lu"
4835  " is %lu\n",
4836  (ulong) i, (ulong) j, (ulong) count);
4837  }
4838  }
4839  }
4840 #endif /* UNIV_IBUF_COUNT_DEBUG */
4841 
4842  mutex_exit(&ibuf_mutex);
4843 }
4844 #endif /* !UNIV_HOTBACKUP */