Drizzled Public API Documentation

mem0pool.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1997, 2009, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /********************************************************************/
26 #include "mem0pool.h"
27 #ifdef UNIV_NONINL
28 #include "mem0pool.ic"
29 #endif
30 
31 #include "srv0srv.h"
32 #include "sync0sync.h"
33 #include "ut0mem.h"
34 #include "ut0lst.h"
35 #include "ut0byte.h"
36 #include "mem0mem.h"
37 #include "srv0start.h"
38 
39 /* We would like to use also the buffer frames to allocate memory. This
40 would be desirable, because then the memory consumption of the database
41 would be fixed, and we might even lock the buffer pool to the main memory.
42 The problem here is that the buffer management routines can themselves call
43 memory allocation, while the buffer pool mutex is reserved.
44 
45 The main components of the memory consumption are:
46 
47 1. buffer pool,
48 2. parsed and optimized SQL statements,
49 3. data dictionary cache,
50 4. log buffer,
51 5. locks for each transaction,
52 6. hash table for the adaptive index,
53 7. state and buffers for each SQL query currently being executed,
54 8. session for each user, and
55 9. stack for each OS thread.
56 
57 Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
58 consume very much memory. Items 7 and 8 should consume quite little memory,
59 and the OS should take care of item 9, which too should consume little memory.
60 
61 A solution to the memory management:
62 
63 1. the buffer pool size is set separately;
64 2. log buffer size is set separately;
65 3. the common pool size for all the other entries, except 8, is set separately.
66 
67 Problems: we may waste memory if the common pool is set too big. Another
68 problem is the locks, which may take very much space in big transactions.
69 Then the shared pool size should be set very big. We can allow locks to take
70 space from the buffer pool, but the SQL optimizer is then unaware of the
71 usable size of the buffer pool. We could also combine the objects in the
72 common pool and the buffers in the buffer pool into a single LRU list and
73 manage it uniformly, but this approach does not take into account the parsing
74 and other costs unique to SQL statements.
75 
76 The locks for a transaction can be seen as a part of the state of the
77 transaction. Hence, they should be stored in the common pool. We still
78 have the problem of a very big update transaction, for example, which
79 will set very many x-locks on rows, and the locks will consume a lot
80 of memory, say, half of the buffer pool size.
81 
82 Another problem is what to do if we are not able to malloc a requested
83 block of memory from the common pool. Then we can request memory from
84 the operating system. If it does not help, a system error results.
85 
86 Because 5 and 6 may potentially consume very much memory, we let them grow
87 into the buffer pool. We may let the locks of a transaction take frames
88 from the buffer pool, when the corresponding memory heap block has grown to
89 the size of a buffer frame. Similarly for the hash node cells of the locks,
90 and for the adaptive index. Thus, for each individual transaction, its locks
91 can occupy at most about the size of the buffer frame of memory in the common
92 pool, and after that its locks will grow into the buffer pool. */
93 
95 #define MEM_AREA_FREE 1
96 
98 #define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
99 
100 
104  byte* buf;
105  ulint size;
106  ulint reserved;
110  free_list[64];
113 };
114 
116 UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
117 
118 #ifdef UNIV_PFS_MUTEX
119 /* Key to register mutex in mem_pool_struct with performance schema */
120 UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
121 #endif /* UNIV_PFS_MUTEX */
122 
123 /* We use this counter to check that the mem pool mutex does not leak;
124 this is to track a strange assertion failure reported at
125 mysql@lists.mysql.com */
126 
127 UNIV_INTERN ulint mem_n_threads_inside = 0;
128 
129 /********************************************************************/
133 UNIV_INLINE
134 void
135 mem_pool_mutex_enter(
136 /*=================*/
137  mem_pool_t* pool)
138 {
140  mutex_enter(&(pool->mutex));
141  }
142 }
143 
144 /********************************************************************/
148 UNIV_INLINE
149 void
150 mem_pool_mutex_exit(
151 /*================*/
152  mem_pool_t* pool)
153 {
155  mutex_exit(&(pool->mutex));
156  }
157 }
158 
159 /********************************************************************/
162 UNIV_INLINE
163 ulint
164 mem_area_get_size(
165 /*==============*/
166  mem_area_t* area)
167 {
168  return(area->size_and_free & ~MEM_AREA_FREE);
169 }
170 
171 /********************************************************************/
173 UNIV_INLINE
174 void
175 mem_area_set_size(
176 /*==============*/
177  mem_area_t* area,
178  ulint size)
179 {
180  area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
181  | size;
182 }
183 
184 /********************************************************************/
187 UNIV_INLINE
188 ibool
189 mem_area_get_free(
190 /*==============*/
191  mem_area_t* area)
192 {
193 #if TRUE != MEM_AREA_FREE
194 # error "TRUE != MEM_AREA_FREE"
195 #endif
196  return(area->size_and_free & MEM_AREA_FREE);
197 }
198 
199 /********************************************************************/
201 UNIV_INLINE
202 void
203 mem_area_set_free(
204 /*==============*/
205  mem_area_t* area,
206  ibool free)
207 {
208 #if TRUE != MEM_AREA_FREE
209 # error "TRUE != MEM_AREA_FREE"
210 #endif
211  area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
212  | free;
213 }
214 
215 /********************************************************************/
218 UNIV_INTERN
219 mem_pool_t*
221 /*============*/
222  ulint size)
223 {
224  mem_pool_t* pool;
225  mem_area_t* area;
226  ulint i;
227  ulint used;
228 
229  pool = static_cast<mem_pool_t *>(ut_malloc(sizeof(mem_pool_t)));
230 
231  /* We do not set the memory to zero (FALSE) in the pool,
232  but only when allocated at a higher level in mem0mem.c.
233  This is to avoid masking useful Purify warnings. */
234 
235  pool->buf = static_cast<unsigned char *>(ut_malloc_low(size, FALSE, TRUE));
236  pool->size = size;
237 
238  mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
239 
240  /* Initialize the free lists */
241 
242  for (i = 0; i < 64; i++) {
243 
244  UT_LIST_INIT(pool->free_list[i]);
245  }
246 
247  used = 0;
248 
249  while (size - used >= MEM_AREA_MIN_SIZE) {
250 
251  i = ut_2_log(size - used);
252 
253  if (ut_2_exp(i) > size - used) {
254 
255  /* ut_2_log rounds upward */
256 
257  i--;
258  }
259 
260  area = (mem_area_t*)(pool->buf + used);
261 
262  mem_area_set_size(area, ut_2_exp(i));
263  mem_area_set_free(area, TRUE);
264  UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
266 
267  UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
268 
269  used = used + ut_2_exp(i);
270  }
271 
272  ut_ad(size >= used);
273 
274  pool->reserved = 0;
275 
276  return(pool);
277 }
278 
279 /********************************************************************/
281 UNIV_INTERN
282 void
284 /*==========*/
285  mem_pool_t* pool)
286 {
287  ut_free(pool->buf);
288  ut_free(pool);
289 }
290 
291 /********************************************************************/
294 static
295 ibool
296 mem_pool_fill_free_list(
297 /*====================*/
298  ulint i,
299  mem_pool_t* pool)
300 {
301  mem_area_t* area;
302  mem_area_t* area2;
303  ibool ret;
304 
305  ut_ad(mutex_own(&(pool->mutex)));
306 
307  if (UNIV_UNLIKELY(i >= 63)) {
308  /* We come here when we have run out of space in the
309  memory pool: */
310 
311  return(FALSE);
312  }
313 
314  area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
315 
316  if (area == NULL) {
317  if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
318  ut_print_timestamp(stderr);
319 
320  fprintf(stderr,
321  " InnoDB: Error: mem pool free list %lu"
322  " length is %lu\n"
323  "InnoDB: though the list is empty!\n",
324  (ulong) i + 1,
325  (ulong)
326  UT_LIST_GET_LEN(pool->free_list[i + 1]));
327  }
328 
329  ret = mem_pool_fill_free_list(i + 1, pool);
330 
331  if (ret == FALSE) {
332 
333  return(FALSE);
334  }
335 
336  area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
337  }
338 
339  if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
341 
342  ut_error;
343  }
344 
345  UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
346 
347  area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
348  UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
349 
350  mem_area_set_size(area2, ut_2_exp(i));
351  mem_area_set_free(area2, TRUE);
352 
353  UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
354 
355  mem_area_set_size(area, ut_2_exp(i));
356 
357  UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
358 
359  return(TRUE);
360 }
361 
362 /********************************************************************/
366 UNIV_INTERN
367 void*
369 /*===========*/
370  ulint* psize,
375  mem_pool_t* pool)
376 {
377  mem_area_t* area;
378  ulint size;
379  ulint n;
380  ibool ret;
381 
382  /* If we are using os allocator just make a simple call
383  to malloc */
384  if (UNIV_LIKELY(srv_use_sys_malloc)) {
385  return(malloc(*psize));
386  }
387 
388  size = *psize;
389  n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
390 
391  mutex_enter(&(pool->mutex));
392  mem_n_threads_inside++;
393 
394  ut_a(mem_n_threads_inside == 1);
395 
396  area = UT_LIST_GET_FIRST(pool->free_list[n]);
397 
398  if (area == NULL) {
399  ret = mem_pool_fill_free_list(n, pool);
400 
401  if (ret == FALSE) {
402  /* Out of memory in memory pool: we try to allocate
403  from the operating system with the regular malloc: */
404 
405  mem_n_threads_inside--;
406  mutex_exit(&(pool->mutex));
407 
408  return(ut_malloc(size));
409  }
410 
411  area = UT_LIST_GET_FIRST(pool->free_list[n]);
412  }
413 
414  if (!mem_area_get_free(area)) {
415  fprintf(stderr,
416  "InnoDB: Error: Removing element from mem pool"
417  " free list %lu though the\n"
418  "InnoDB: element is not marked free!\n",
419  (ulong) n);
420 
422 
423  /* Try to analyze a strange assertion failure reported at
424  mysql@lists.mysql.com where the free bit IS 1 in the
425  hex dump above */
426 
427  if (mem_area_get_free(area)) {
428  fprintf(stderr,
429  "InnoDB: Probably a race condition"
430  " because now the area is marked free!\n");
431  }
432 
433  ut_error;
434  }
435 
436  if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
437  fprintf(stderr,
438  "InnoDB: Error: Removing element from mem pool"
439  " free list %lu\n"
440  "InnoDB: though the list length is 0!\n",
441  (ulong) n);
443 
444  ut_error;
445  }
446 
447  ut_ad(mem_area_get_size(area) == ut_2_exp(n));
448 
449  mem_area_set_free(area, FALSE);
450 
451  UT_LIST_REMOVE(free_list, pool->free_list[n], area);
452 
453  pool->reserved += mem_area_get_size(area);
454 
455  mem_n_threads_inside--;
456  mutex_exit(&(pool->mutex));
457 
458  ut_ad(mem_pool_validate(pool));
459 
460  *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
461  UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
462 
463  return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
464 }
465 
466 /********************************************************************/
469 UNIV_INLINE
470 mem_area_t*
471 mem_area_get_buddy(
472 /*===============*/
473  mem_area_t* area,
474  ulint size,
475  mem_pool_t* pool)
476 {
477  mem_area_t* buddy;
478 
479  ut_ad(size != 0);
480 
481  if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
482 
483  /* The buddy is in a higher address */
484 
485  buddy = (mem_area_t*)(((byte*)area) + size);
486 
487  if ((((byte*)buddy) - pool->buf) + size > pool->size) {
488 
489  /* The buddy is not wholly contained in the pool:
490  there is no buddy */
491 
492  buddy = NULL;
493  }
494  } else {
495  /* The buddy is in a lower address; NOTE that area cannot
496  be at the pool lower end, because then we would end up to
497  the upper branch in this if-clause: the remainder would be
498  0 */
499 
500  buddy = (mem_area_t*)(((byte*)area) - size);
501  }
502 
503  return(buddy);
504 }
505 
506 /********************************************************************/
508 UNIV_INTERN
509 void
511 /*==========*/
512  void* ptr,
514  mem_pool_t* pool)
515 {
516  mem_area_t* area;
517  mem_area_t* buddy;
518  void* new_ptr;
519  ulint size;
520  ulint n;
521 
522  if (UNIV_LIKELY(srv_use_sys_malloc)) {
523  free(ptr);
524 
525  return;
526  }
527 
528  /* It may be that the area was really allocated from the OS with
529  regular malloc: check if ptr points within our memory pool */
530 
531  if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
532  ut_free(ptr);
533 
534  return;
535  }
536 
537  area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
538 
539  if (mem_area_get_free(area)) {
540  fprintf(stderr,
541  "InnoDB: Error: Freeing element to mem pool"
542  " free list though the\n"
543  "InnoDB: element is marked free!\n");
544 
546  ut_error;
547  }
548 
549  size = mem_area_get_size(area);
550  UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
551 
552  if (size == 0) {
553  fprintf(stderr,
554  "InnoDB: Error: Mem area size is 0. Possibly a"
555  " memory overrun of the\n"
556  "InnoDB: previous allocated area!\n");
557 
559  ut_error;
560  }
561 
562 #ifdef UNIV_LIGHT_MEM_DEBUG
563  if (((byte*)area) + size < pool->buf + pool->size) {
564 
565  ulint next_size;
566 
567  next_size = mem_area_get_size(
568  (mem_area_t*)(((byte*)area) + size));
569  if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
570  fprintf(stderr,
571  "InnoDB: Error: Memory area size %lu,"
572  " next area size %lu not a power of 2!\n"
573  "InnoDB: Possibly a memory overrun of"
574  " the buffer being freed here.\n",
575  (ulong) size, (ulong) next_size);
577 
578  ut_error;
579  }
580  }
581 #endif
582  buddy = mem_area_get_buddy(area, size, pool);
583 
584  n = ut_2_log(size);
585 
586  mem_pool_mutex_enter(pool);
587  mem_n_threads_inside++;
588 
589  ut_a(mem_n_threads_inside == 1);
590 
591  if (buddy && mem_area_get_free(buddy)
592  && (size == mem_area_get_size(buddy))) {
593 
594  /* The buddy is in a free list */
595 
596  if ((byte*)buddy < (byte*)area) {
597  new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
598 
599  mem_area_set_size(buddy, 2 * size);
600  mem_area_set_free(buddy, FALSE);
601  } else {
602  new_ptr = ptr;
603 
604  mem_area_set_size(area, 2 * size);
605  }
606 
607  /* Remove the buddy from its free list and merge it to area */
608 
609  UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
610 
611  pool->reserved += ut_2_exp(n);
612 
613  mem_n_threads_inside--;
614  mem_pool_mutex_exit(pool);
615 
616  mem_area_free(new_ptr, pool);
617 
618  return;
619  } else {
620  UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
621 
622  mem_area_set_free(area, TRUE);
623 
624  ut_ad(pool->reserved >= size);
625 
626  pool->reserved -= size;
627  }
628 
629  mem_n_threads_inside--;
630  mem_pool_mutex_exit(pool);
631 
632  ut_ad(mem_pool_validate(pool));
633 }
634 
635 /********************************************************************/
638 UNIV_INTERN
639 ibool
641 /*==============*/
642  mem_pool_t* pool)
643 {
644  mem_area_t* area;
645  mem_area_t* buddy;
646  ulint free;
647  ulint i;
648 
649  mem_pool_mutex_enter(pool);
650 
651  free = 0;
652 
653  for (i = 0; i < 64; i++) {
654 
656  (void) 0);
657 
658  area = UT_LIST_GET_FIRST(pool->free_list[i]);
659 
660  while (area != NULL) {
661  ut_a(mem_area_get_free(area));
662  ut_a(mem_area_get_size(area) == ut_2_exp(i));
663 
664  buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
665 
666  ut_a(!buddy || !mem_area_get_free(buddy)
667  || (ut_2_exp(i) != mem_area_get_size(buddy)));
668 
669  area = UT_LIST_GET_NEXT(free_list, area);
670 
671  free += ut_2_exp(i);
672  }
673  }
674 
675  ut_a(free + pool->reserved == pool->size);
676 
677  mem_pool_mutex_exit(pool);
678 
679  return(TRUE);
680 }
681 
682 /********************************************************************/
684 UNIV_INTERN
685 void
687 /*================*/
688  FILE* outfile,
689  mem_pool_t* pool)
690 {
691  ulint i;
692 
693  mem_pool_validate(pool);
694 
695  fprintf(outfile, "INFO OF A MEMORY POOL\n");
696 
697  mutex_enter(&(pool->mutex));
698 
699  for (i = 0; i < 64; i++) {
700  if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
701 
702  fprintf(outfile,
703  "Free list length %lu for"
704  " blocks of size %lu\n",
705  (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
706  (ulong) ut_2_exp(i));
707  }
708  }
709 
710  fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
711  (ulong) pool->reserved);
712  mutex_exit(&(pool->mutex));
713 }
714 
715 /********************************************************************/
718 UNIV_INTERN
719 ulint
721 /*==================*/
722  mem_pool_t* pool)
723 {
724  ulint reserved;
725 
726  mutex_enter(&(pool->mutex));
727 
728  reserved = pool->reserved;
729 
730  mutex_exit(&(pool->mutex));
731 
732  return(reserved);
733 }