49 #ifndef UNIV_HOTBACKUP
55 # include <sys/types.h>
56 # include <sys/stat.h>
60 #if defined(LINUX_NATIVE_AIO)
71 = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
80 UNIV_INTERN ibool os_do_not_call_flush_at_each_write = FALSE;
85 #ifndef UNIV_HOTBACKUP
88 #define OS_FILE_N_SEEK_MUTEXES 16
89 UNIV_INTERN
os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
92 #define OS_AIO_MERGE_N_CONSECUTIVE 64
153 UNIV_INTERN mysql_pfs_key_t innodb_file_data_key;
154 UNIV_INTERN mysql_pfs_key_t innodb_file_log_key;
155 UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
192 #elif defined(LINUX_NATIVE_AIO)
239 #if defined(LINUX_NATIVE_AIO)
240 io_context_t* aio_ctx;
244 struct io_event* aio_events;
252 #if defined(LINUX_NATIVE_AIO)
254 #define OS_AIO_REAP_TIMEOUT (500000000UL)
257 #define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL)
260 #define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
264 static os_event_t* os_aio_segment_wait_events = NULL;
276 static ulint os_aio_n_segments = ULINT_UNDEFINED;
280 static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
283 UNIV_INTERN ulint os_n_file_reads = 0;
284 UNIV_INTERN ulint os_bytes_read_since_printout = 0;
285 UNIV_INTERN ulint os_n_file_writes = 0;
286 UNIV_INTERN ulint os_n_fsyncs = 0;
287 UNIV_INTERN ulint os_n_file_reads_old = 0;
288 UNIV_INTERN ulint os_n_file_writes_old = 0;
289 UNIV_INTERN ulint os_n_fsyncs_old = 0;
290 UNIV_INTERN time_t os_last_printout;
292 UNIV_INTERN ibool os_has_said_disk_full = FALSE;
294 #ifndef UNIV_HOTBACKUP
317 OSVERSIONINFO os_info;
319 os_info.dwOSVersionInfoSize =
sizeof(OSVERSIONINFO);
321 ut_a(GetVersionEx(&os_info));
323 if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
325 }
else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
327 }
else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
328 switch (os_info.dwMajorVersion) {
333 return (os_info.dwMinorVersion == 0) ?
OS_WIN2000
362 ibool report_all_errors)
369 err = (ulint) GetLastError();
371 if (report_all_errors
372 || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
376 " InnoDB: Operating system error number %lu"
377 " in a file operation.\n", (ulong) err);
379 if (err == ERROR_PATH_NOT_FOUND) {
381 "InnoDB: The error means the system"
382 " cannot find the path specified.\n");
386 "InnoDB: If you are installing InnoDB,"
387 " remember that you must create\n"
388 "InnoDB: directories yourself, InnoDB"
389 " does not create them.\n");
391 }
else if (err == ERROR_ACCESS_DENIED) {
393 "InnoDB: The error means mysqld does not have"
394 " the access rights to\n"
395 "InnoDB: the directory. It may also be"
396 " you have created a subdirectory\n"
397 "InnoDB: of the same name as a data file.\n");
398 }
else if (err == ERROR_SHARING_VIOLATION
399 || err == ERROR_LOCK_VIOLATION) {
401 "InnoDB: The error means that another program"
402 " is using InnoDB's files.\n"
403 "InnoDB: This might be a backup or antivirus"
404 " software or another instance\n"
406 " Please close it to get rid of this error.\n");
407 }
else if (err == ERROR_WORKING_SET_QUOTA
408 || err == ERROR_NO_SYSTEM_RESOURCES) {
410 "InnoDB: The error means that there are no"
411 " sufficient system resources or quota to"
412 " complete the operation.\n");
413 }
else if (err == ERROR_OPERATION_ABORTED) {
415 "InnoDB: The error means that the I/O"
416 " operation has been aborted\n"
417 "InnoDB: because of either a thread exit"
418 " or an application request.\n"
419 "InnoDB: Retry attempt is made.\n");
422 "InnoDB: Some operating system error numbers"
423 " are described at\n"
426 "operating-system-error-codes.html\n");
432 if (err == ERROR_FILE_NOT_FOUND) {
434 }
else if (err == ERROR_DISK_FULL) {
435 return(OS_FILE_DISK_FULL);
436 }
else if (err == ERROR_FILE_EXISTS) {
437 return(OS_FILE_ALREADY_EXISTS);
438 }
else if (err == ERROR_SHARING_VIOLATION
439 || err == ERROR_LOCK_VIOLATION) {
440 return(OS_FILE_SHARING_VIOLATION);
441 }
else if (err == ERROR_WORKING_SET_QUOTA
442 || err == ERROR_NO_SYSTEM_RESOURCES) {
443 return(OS_FILE_INSUFFICIENT_RESOURCE);
444 }
else if (err == ERROR_OPERATION_ABORTED) {
445 return(OS_FILE_OPERATION_ABORTED);
452 if (report_all_errors
453 || (err != ENOSPC && err != EEXIST)) {
457 " InnoDB: Operating system error number %lu"
458 " in a file operation.\n", (ulong) err);
462 "InnoDB: The error means the system"
463 " cannot find the path specified.\n");
467 "InnoDB: If you are installing InnoDB,"
468 " remember that you must create\n"
469 "InnoDB: directories yourself, InnoDB"
470 " does not create them.\n");
472 }
else if (err == EACCES) {
474 "InnoDB: The error means mysqld does not have"
475 " the access rights to\n"
476 "InnoDB: the directory.\n");
478 if (strerror((
int)err) != NULL) {
480 "InnoDB: Error number %lu"
482 err, strerror((
int)err));
486 "InnoDB: Some operating system"
487 " error numbers are described at\n"
490 "operating-system-error-codes.html\n");
498 return(OS_FILE_DISK_FULL);
502 return(OS_FILE_ALREADY_EXISTS);
506 return(OS_FILE_PATH_ERROR);
508 if (srv_use_native_aio) {
509 return(OS_FILE_AIO_RESOURCES_RESERVED);
513 if (srv_use_native_aio) {
514 return(OS_FILE_AIO_INTERRUPTED);
529 os_file_handle_error_cond_exit(
532 const char* operation,
540 if (err == OS_FILE_DISK_FULL) {
543 if (os_has_said_disk_full) {
551 " InnoDB: Encountered a problem with"
557 " InnoDB: Disk is full. Try to clean the disk"
558 " to free space.\n");
560 os_has_said_disk_full = TRUE;
565 }
else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
568 }
else if (err == OS_FILE_AIO_INTERRUPTED) {
571 }
else if (err == OS_FILE_ALREADY_EXISTS
572 || err == OS_FILE_PATH_ERROR) {
575 }
else if (err == OS_FILE_SHARING_VIOLATION) {
579 }
else if (err == OS_FILE_INSUFFICIENT_RESOURCE) {
583 }
else if (err == OS_FILE_OPERATION_ABORTED) {
589 fprintf(stderr,
"InnoDB: File name %s\n", name);
592 fprintf(stderr,
"InnoDB: File operation call: '%s'.\n",
596 fprintf(stderr,
"InnoDB: Cannot continue operation.\n");
612 os_file_handle_error(
615 const char* operation)
618 return(os_file_handle_error_cond_exit(name, operation, TRUE));
626 os_file_handle_error_no_exit(
629 const char* operation)
632 return(os_file_handle_error_cond_exit(name, operation, FALSE));
636 #define USE_FILE_LOCK
637 #if defined(UNIV_HOTBACKUP) || defined(__WIN__)
641 # undef USE_FILE_LOCK
660 lk.l_whence = SEEK_SET;
661 lk.l_start = lk.l_len = 0;
662 if (fcntl(fd, F_SETLK, &lk) == -1) {
664 "InnoDB: Unable to lock %s, error: %d\n", name, errno);
666 if (errno == EAGAIN || errno == EACCES) {
668 "InnoDB: Check that you do not already have"
669 " another drizzled process\n"
670 "InnoDB: using the same InnoDB data"
681 #ifndef UNIV_HOTBACKUP
693 for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
711 file = fdopen(fd,
"w+b");
717 " InnoDB: Error: unable to create temporary file;"
718 " errno: %d\n", errno);
740 ibool error_is_fatal)
748 LPWIN32_FIND_DATA lpFindFileData;
749 char path[OS_FILE_MAX_PATH + 3];
751 ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
753 strcpy(path, dirname);
754 strcpy(path + strlen(path),
"\\*");
760 lpFindFileData =
ut_malloc(
sizeof(WIN32_FIND_DATA));
762 dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
766 if (dir == INVALID_HANDLE_VALUE) {
768 if (error_is_fatal) {
769 os_file_handle_error(dirname,
"opendir");
777 dir = opendir(dirname);
779 if (dir == NULL && error_is_fatal) {
780 os_file_handle_error(dirname,
"opendir");
799 ret = FindClose(dir);
802 os_file_handle_error_no_exit(NULL,
"closedir");
814 os_file_handle_error_no_exit(NULL,
"closedir");
834 LPWIN32_FIND_DATA lpFindFileData;
837 lpFindFileData =
ut_malloc(
sizeof(WIN32_FIND_DATA));
839 ret = FindNextFile(dir, lpFindFileData);
842 ut_a(strlen((
char *) lpFindFileData->cFileName)
845 if (strcmp((
char *) lpFindFileData->cFileName,
".") == 0
846 || strcmp((
char *) lpFindFileData->cFileName,
"..") == 0) {
851 strcpy(info->
name, (
char *) lpFindFileData->cFileName);
853 info->
size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
854 + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
857 if (lpFindFileData->dwFileAttributes
858 & FILE_ATTRIBUTE_REPARSE_POINT) {
864 info->
type = OS_FILE_TYPE_LINK;
865 }
else if (lpFindFileData->dwFileAttributes
866 & FILE_ATTRIBUTE_DIRECTORY) {
867 info->
type = OS_FILE_TYPE_DIR;
873 info->
type = OS_FILE_TYPE_FILE;
881 }
else if (GetLastError() == ERROR_NO_MORE_FILES) {
885 os_file_handle_error_no_exit(dirname,
886 "readdir_next_file");
893 struct stat statinfo;
894 #ifdef HAVE_READDIR_R
895 char dirent_buf[
sizeof(
struct dirent)
896 + _POSIX_PATH_MAX + 100];
904 #ifdef HAVE_READDIR_R
905 ret = readdir_r(dir, (
struct dirent*)dirent_buf, &ent);
917 "InnoDB: cannot read directory %s, error %lu\n",
918 dirname, (ulong)ret);
929 ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
938 ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
940 if (strcmp(ent->d_name,
".") == 0 || strcmp(ent->d_name,
"..") == 0) {
945 strcpy(info->
name, ent->d_name);
947 full_path =
static_cast<char*
>(
ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10));
949 sprintf(full_path,
"%s/%s", dirname, ent->d_name);
951 ret = stat(full_path, &statinfo);
955 if (errno == ENOENT) {
971 os_file_handle_error_no_exit(full_path,
"stat");
978 info->
size = (ib_int64_t)statinfo.st_size;
980 if (S_ISDIR(statinfo.st_mode)) {
981 info->
type = OS_FILE_TYPE_DIR;
982 }
else if (S_ISLNK(statinfo.st_mode)) {
983 info->
type = OS_FILE_TYPE_LINK;
984 }
else if (S_ISREG(statinfo.st_mode)) {
985 info->
type = OS_FILE_TYPE_FILE;
987 info->
type = OS_FILE_TYPE_UNKNOWN;
1006 const char* pathname,
1008 ibool fail_if_exists)
1014 rcode = CreateDirectory((LPCTSTR) pathname, NULL);
1016 || (GetLastError() == ERROR_ALREADY_EXISTS
1017 && !fail_if_exists))) {
1019 os_file_handle_error(pathname,
"CreateDirectory");
1028 rcode = mkdir(pathname, 0770);
1030 if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
1032 os_file_handle_error(pathname,
"mkdir");
1068 DWORD attributes = 0;
1075 create_flag = OPEN_EXISTING;
1076 }
else if (create_mode == OS_FILE_CREATE) {
1077 create_flag = CREATE_NEW;
1078 }
else if (create_mode == OS_FILE_CREATE_PATH) {
1084 create_flag = CREATE_NEW;
1085 create_mode = OS_FILE_CREATE;
1091 if (access_type == OS_FILE_READ_ONLY) {
1092 access = GENERIC_READ;
1093 }
else if (access_type == OS_FILE_READ_WRITE) {
1094 access = GENERIC_READ | GENERIC_WRITE;
1100 file = CreateFile((LPCTSTR) name,
1102 FILE_SHARE_READ | FILE_SHARE_WRITE,
1110 if (file == INVALID_HANDLE_VALUE) {
1113 retry = os_file_handle_error(name,
1133 if (access_type == OS_FILE_READ_ONLY) {
1134 create_flag = O_RDONLY;
1136 create_flag = O_RDWR;
1138 }
else if (create_mode == OS_FILE_CREATE) {
1139 create_flag = O_RDWR | O_CREAT | O_EXCL;
1140 }
else if (create_mode == OS_FILE_CREATE_PATH) {
1146 create_flag = O_RDWR | O_CREAT | O_EXCL;
1147 create_mode = OS_FILE_CREATE;
1153 if (create_mode == OS_FILE_CREATE) {
1154 file = open(name, create_flag, S_IRUSR | S_IWUSR
1155 | S_IRGRP | S_IWGRP);
1157 file = open(name, create_flag);
1163 retry = os_file_handle_error(name,
1169 #ifdef USE_FILE_LOCK
1170 }
else if (access_type == OS_FILE_READ_WRITE
1171 && os_file_lock(file, name)) {
1210 DWORD attributes = 0;
1211 DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
1216 create_flag = OPEN_EXISTING;
1217 }
else if (create_mode == OS_FILE_CREATE) {
1218 create_flag = CREATE_NEW;
1224 if (access_type == OS_FILE_READ_ONLY) {
1225 access = GENERIC_READ;
1226 }
else if (access_type == OS_FILE_READ_WRITE) {
1227 access = GENERIC_READ | GENERIC_WRITE;
1228 }
else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
1229 access = GENERIC_READ;
1230 share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
1240 file = CreateFile((LPCTSTR) name,
1248 if (file == INVALID_HANDLE_VALUE) {
1262 if (access_type == OS_FILE_READ_ONLY) {
1263 create_flag = O_RDONLY;
1265 create_flag = O_RDWR;
1267 }
else if (create_mode == OS_FILE_CREATE) {
1268 create_flag = O_RDWR | O_CREAT | O_EXCL;
1274 if (create_mode == OS_FILE_CREATE) {
1275 file = open(name, create_flag, S_IRUSR | S_IWUSR
1276 | S_IRGRP | S_IWGRP);
1278 file = open(name, create_flag);
1283 #ifdef USE_FILE_LOCK
1284 }
else if (access_type == OS_FILE_READ_WRITE
1285 && os_file_lock(file, name)) {
1305 const char* file_name,
1306 const char* operation_name)
1311 #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
1312 if (directio(fd, DIRECTIO_ON) == -1) {
1314 errno_save = (int)errno;
1317 " InnoDB: Failed to set DIRECTIO_ON "
1318 "on file %s: %s: %s, continuing anyway\n",
1319 file_name, operation_name, strerror(errno_save));
1321 #elif defined(O_DIRECT)
1322 if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
1324 errno_save = (int)errno;
1327 " InnoDB: Failed to set O_DIRECT "
1328 "on file %s: %s: %s, continuing anyway\n",
1329 file_name, operation_name, strerror(errno_save));
1330 if (errno_save == EINVAL) {
1333 " InnoDB: O_DIRECT is known to result in "
1334 "'Invalid argument' on Linux on tmpfs, "
1335 "see MySQL Bug#26662\n");
1341 (void)operation_name;
1377 DWORD share_mode = FILE_SHARE_READ;
1384 if (create_mode == OS_FILE_OPEN_RAW) {
1385 create_flag = OPEN_EXISTING;
1386 share_mode = FILE_SHARE_WRITE;
1388 || create_mode == OS_FILE_OPEN_RETRY) {
1389 create_flag = OPEN_EXISTING;
1390 }
else if (create_mode == OS_FILE_CREATE) {
1391 create_flag = CREATE_NEW;
1392 }
else if (create_mode == OS_FILE_OVERWRITE) {
1393 create_flag = CREATE_ALWAYS;
1399 if (purpose == OS_FILE_AIO) {
1404 if (srv_use_native_aio) {
1405 attributes = attributes | FILE_FLAG_OVERLAPPED;
1408 #ifdef UNIV_NON_BUFFERED_IO
1409 # ifndef UNIV_HOTBACKUP
1410 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1414 }
else if (srv_win_file_flush_method
1416 attributes = attributes | FILE_FLAG_NO_BUFFERING;
1419 attributes = attributes | FILE_FLAG_NO_BUFFERING;
1422 }
else if (purpose == OS_FILE_NORMAL) {
1424 #ifdef UNIV_NON_BUFFERED_IO
1425 # ifndef UNIV_HOTBACKUP
1426 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1430 }
else if (srv_win_file_flush_method
1432 attributes = attributes | FILE_FLAG_NO_BUFFERING;
1435 attributes = attributes | FILE_FLAG_NO_BUFFERING;
1443 file = CreateFile((LPCTSTR) name,
1444 GENERIC_READ | GENERIC_WRITE,
1462 if (file == INVALID_HANDLE_VALUE) {
1473 retry = os_file_handle_error_no_exit(name,
1474 create_mode == OS_FILE_CREATE ?
1477 retry = os_file_handle_error(name,
1478 create_mode == OS_FILE_CREATE ?
1494 const char* mode_str = NULL;
1499 if (create_mode ==
OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
1500 || create_mode == OS_FILE_OPEN_RETRY) {
1503 create_flag = O_RDONLY;
1505 create_flag = O_RDWR;
1506 }
else if (create_mode == OS_FILE_CREATE) {
1507 mode_str =
"CREATE";
1508 create_flag = O_RDWR | O_CREAT | O_EXCL;
1509 }
else if (create_mode == OS_FILE_OVERWRITE) {
1510 mode_str =
"OVERWRITE";
1511 create_flag = O_RDWR | O_CREAT | O_TRUNC;
1518 ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
1524 if (type == OS_LOG_FILE
1528 fprintf(stderr,
"Using O_SYNC for file %s\n", name);
1531 create_flag = create_flag | O_SYNC;
1548 retry = os_file_handle_error_no_exit(name,
1549 create_mode == OS_FILE_CREATE ?
1552 retry = os_file_handle_error(name,
1553 create_mode == OS_FILE_CREATE ?
1568 if (type != OS_LOG_FILE
1579 #ifdef USE_FILE_LOCK
1580 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
1582 if (create_mode == OS_FILE_OPEN_RETRY) {
1585 fputs(
" InnoDB: Retrying to lock"
1586 " the first data file\n",
1588 for (i = 0; i < 100; i++) {
1590 if (!os_file_lock(file, name)) {
1596 fputs(
" InnoDB: Unable to open the first data file\n",
1626 ret = DeleteFile((LPCTSTR)name);
1632 if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1640 if (count > 100 && 0 == (count % 10)) {
1642 "InnoDB: Warning: cannot delete file %s\n"
1643 "InnoDB: Are you running ibbackup"
1644 " to back up the file?\n", name);
1662 if (ret != 0 && errno != ENOENT) {
1663 os_file_handle_error_no_exit(name,
"delete");
1688 ret = DeleteFile((LPCTSTR)name);
1694 if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1703 if (count > 100 && 0 == (count % 10)) {
1705 "InnoDB: Warning: cannot delete file %s\n"
1706 "InnoDB: Are you running ibbackup"
1707 " to back up the file?\n", name);
1726 os_file_handle_error_no_exit(name,
"delete");
1744 const char* oldpath,
1746 const char* newpath)
1751 ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
1757 os_file_handle_error_no_exit(oldpath,
"rename");
1763 ret = rename(oldpath, newpath);
1766 os_file_handle_error_no_exit(oldpath,
"rename");
1791 ret = CloseHandle(file);
1797 os_file_handle_error(NULL,
"close");
1806 os_file_handle_error(NULL,
"close");
1815 #ifdef UNIV_HOTBACKUP
1821 os_file_close_no_error_handling(
1830 ret = CloseHandle(file);
1868 low = GetFileSize(file, &high);
1870 if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
1881 offs = lseek(file, 0, SEEK_END);
1883 if (offs == ((off_t)-1)) {
1888 if (
sizeof(off_t) > 4) {
1889 *size = (ulint)(offs & 0xFFFFFFFFUL);
1890 *size_high = (ulint)(offs >> 32);
1892 *size = (ulint) offs;
1920 return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
1937 ib_int64_t current_size;
1938 ib_int64_t desired_size;
1944 ut_a(size == (size & 0xFFFFFFFF));
1947 desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
1950 buf_size =
ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
1952 buf2 =
static_cast<unsigned char *
>(
ut_malloc(buf_size + UNIV_PAGE_SIZE));
1955 buf =
static_cast<unsigned char *
>(
ut_align(buf2, UNIV_PAGE_SIZE));
1958 memset(buf, 0, buf_size);
1960 if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
1962 fprintf(stderr,
"InnoDB: Progress in MB:");
1965 while (current_size < desired_size) {
1968 if (desired_size - current_size < (ib_int64_t) buf_size) {
1969 n_bytes = (ulint) (desired_size - current_size);
1974 ret = os_file_write(name, file, buf,
1975 (ulint)(current_size & 0xFFFFFFFF),
1976 (ulint)(current_size >> 32),
1980 goto error_handling;
1984 if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
1985 != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
1987 fprintf(stderr,
" %lu00",
1988 (ulong) ((current_size + n_bytes)
1989 / (ib_int64_t)(100 * 1024 * 1024)));
1992 current_size += n_bytes;
1995 if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
1997 fprintf(stderr,
"\n");
2002 ret = os_file_flush(file);
2022 HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
2023 return(SetEndOfFile(h));
2025 return(!ftruncate(fileno(file), ftell(file)));
2053 if (ret == -1 && errno == ENOLCK) {
2055 if (failures % 100 == 0) {
2059 " InnoDB: fsync(): "
2060 "No locks available; retrying\n");
2095 ret = FlushFileBuffers(file);
2106 == ERROR_INVALID_FUNCTION) {
2110 os_file_handle_error(NULL,
"flush");
2120 #if defined(HAVE_DARWIN_THREADS)
2121 # ifndef F_FULLFSYNC
2123 # define F_FULLFSYNC 51
2124 # elif F_FULLFSYNC != 51
2125 # error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
2132 if (!srv_have_fullfsync) {
2136 ret = os_file_fsync(file);
2138 ret = fcntl(file, F_FULLFSYNC, NULL);
2143 ret = os_file_fsync(file);
2147 ret = os_file_fsync(file);
2165 " InnoDB: Error: the OS said file flush did not succeed\n");
2167 os_file_handle_error(NULL,
"flush");
2194 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
2198 ut_a((offset & 0xFFFFFFFFUL) == offset);
2203 if (
sizeof(off_t) > 4) {
2204 offs = (off_t)offset + (((off_t)offset_high) << 32);
2207 offs = (off_t)offset;
2209 if (offset_high > 0) {
2211 "InnoDB: Error: file read at offset > 4 GB\n");
2217 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
2223 n_bytes = pread(file, buf, (ssize_t)n, offs);
2235 #ifndef UNIV_HOTBACKUP
2243 #ifndef UNIV_HOTBACKUP
2245 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2250 ret_offset = lseek(file, offs, SEEK_SET);
2252 if (ret_offset < 0) {
2255 ret = read(file, buf, (ssize_t)n);
2258 #ifndef UNIV_HOTBACKUP
2289 ut_a((offset & 0xFFFFFFFFUL) == offset);
2294 if (
sizeof(off_t) > 4) {
2295 offs = (off_t)offset + (((off_t)offset_high) << 32);
2297 offs = (off_t)offset;
2299 if (offset_high > 0) {
2301 "InnoDB: Error: file write"
2302 " at offset > 4 GB\n");
2311 #if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
2317 ret = pwrite(file, buf, (ssize_t)n, offs);
2324 # ifdef UNIV_DO_FLUSH
2327 && !os_do_not_call_flush_at_each_write) {
2333 ut_a(TRUE == os_file_flush(file));
2341 # ifndef UNIV_HOTBACKUP
2349 # ifndef UNIV_HOTBACKUP
2351 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2356 ret_offset = lseek(file, offs, SEEK_SET);
2358 if (ret_offset < 0) {
2364 ret = write(file, buf, (ssize_t)n);
2366 # ifdef UNIV_DO_FLUSH
2369 && !os_do_not_call_flush_at_each_write) {
2375 ut_a(TRUE == os_file_flush(file));
2380 # ifndef UNIV_HOTBACKUP
2418 #ifndef UNIV_HOTBACKUP
2424 ut_a((offset & 0xFFFFFFFFUL) == offset);
2425 ut_a((n & 0xFFFFFFFFUL) == n);
2428 os_bytes_read_since_printout += n;
2435 low = (DWORD) offset;
2436 high = (DWORD) offset_high;
2442 #ifndef UNIV_HOTBACKUP
2444 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2449 ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2451 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2453 #ifndef UNIV_HOTBACKUP
2461 goto error_handling;
2464 ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2466 #ifndef UNIV_HOTBACKUP
2474 if (ret && len == n) {
2481 os_bytes_read_since_printout += n;
2484 ret = os_file_pread(file, buf, n, offset, offset_high);
2486 if ((ulint)ret == n) {
2492 "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
2493 "InnoDB: Was only able to read %ld.\n",
2494 (ulong)n, (ulong)offset_high,
2495 (ulong)offset, (
long)ret);
2500 retry = os_file_handle_error(NULL,
"read");
2507 "InnoDB: Fatal error: cannot read from file."
2508 " OS error number %lu.\n",
2510 (ulong) GetLastError()
2547 #ifndef UNIV_HOTBACKUP
2553 ut_a((offset & 0xFFFFFFFFUL) == offset);
2554 ut_a((n & 0xFFFFFFFFUL) == n);
2557 os_bytes_read_since_printout += n;
2564 low = (DWORD) offset;
2565 high = (DWORD) offset_high;
2571 #ifndef UNIV_HOTBACKUP
2573 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2578 ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2580 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2582 #ifndef UNIV_HOTBACKUP
2590 goto error_handling;
2593 ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2595 #ifndef UNIV_HOTBACKUP
2603 if (ret && len == n) {
2610 os_bytes_read_since_printout += n;
2613 ret = os_file_pread(file, buf, n, offset, offset_high);
2615 if ((ulint)ret == n) {
2623 retry = os_file_handle_error_no_exit(NULL,
"read");
2651 flen = fread(str, 1, size - 1, file);
2680 ulint n_retries = 0;
2682 #ifndef UNIV_HOTBACKUP
2688 ut_a((offset & 0xFFFFFFFFUL) == offset);
2689 ut_a((n & 0xFFFFFFFFUL) == n);
2700 low = (DWORD) offset;
2701 high = (DWORD) offset_high;
2707 #ifndef UNIV_HOTBACKUP
2709 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2714 ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2716 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2718 #ifndef UNIV_HOTBACKUP
2729 " InnoDB: Error: File pointer positioning to"
2730 " file %s failed at\n"
2731 "InnoDB: offset %lu %lu. Operating system"
2732 " error number %lu.\n"
2733 "InnoDB: Some operating system error numbers"
2734 " are described at\n"
2736 REFMAN
"operating-system-error-codes.html\n",
2737 name, (ulong) offset_high, (ulong) offset,
2738 (ulong) GetLastError());
2743 ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
2748 # ifdef UNIV_DO_FLUSH
2749 if (!os_do_not_call_flush_at_each_write) {
2750 ut_a(TRUE == os_file_flush(file));
2754 #ifndef UNIV_HOTBACKUP
2762 if (ret && len == n) {
2771 if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
2780 if (!os_has_said_disk_full) {
2782 err = (ulint)GetLastError();
2787 " InnoDB: Error: Write to file %s failed"
2788 " at offset %lu %lu.\n"
2789 "InnoDB: %lu bytes should have been written,"
2790 " only %lu were written.\n"
2791 "InnoDB: Operating system error number %lu.\n"
2792 "InnoDB: Check that your OS and file system"
2793 " support files of this size.\n"
2794 "InnoDB: Check also that the disk is not full"
2795 " or a disk quota exceeded.\n",
2796 name, (ulong) offset_high, (ulong) offset,
2797 (ulong) n, (ulong) len, (ulong) err);
2799 if (strerror((
int)err) != NULL) {
2801 "InnoDB: Error number %lu means '%s'.\n",
2802 (ulong) err, strerror((
int)err));
2806 "InnoDB: Some operating system error numbers"
2807 " are described at\n"
2809 REFMAN
"operating-system-error-codes.html\n");
2811 os_has_said_disk_full = TRUE;
2818 ret = os_file_pwrite(file, buf, n, offset, offset_high);
2820 if ((ulint)ret == n) {
2825 if (!os_has_said_disk_full) {
2830 " InnoDB: Error: Write to file %s failed"
2831 " at offset %lu %lu.\n"
2832 "InnoDB: %lu bytes should have been written,"
2833 " only %ld were written.\n"
2834 "InnoDB: Operating system error number %lu.\n"
2835 "InnoDB: Check that your OS and file system"
2836 " support files of this size.\n"
2837 "InnoDB: Check also that the disk is not full"
2838 " or a disk quota exceeded.\n",
2839 name, offset_high, offset, n, (
long int)ret,
2841 if (strerror(errno) != NULL) {
2843 "InnoDB: Error number %lu means '%s'.\n",
2844 (ulint)errno, strerror(errno));
2848 "InnoDB: Some operating system error numbers"
2849 " are described at\n"
2851 REFMAN
"operating-system-error-codes.html\n");
2853 os_has_said_disk_full = TRUE;
2869 os_file_type_t* type)
2873 struct _stat statinfo;
2875 ret = _stat(path, &statinfo);
2876 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2883 os_file_handle_error_no_exit(path,
"stat");
2888 if (_S_IFDIR & statinfo.st_mode) {
2889 *type = OS_FILE_TYPE_DIR;
2890 }
else if (_S_IFREG & statinfo.st_mode) {
2891 *type = OS_FILE_TYPE_FILE;
2893 *type = OS_FILE_TYPE_UNKNOWN;
2901 struct stat statinfo;
2903 ret = stat(path, &statinfo);
2904 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2911 os_file_handle_error_no_exit(path,
"stat");
2916 if (S_ISDIR(statinfo.st_mode)) {
2917 *type = OS_FILE_TYPE_DIR;
2918 }
else if (S_ISLNK(statinfo.st_mode)) {
2919 *type = OS_FILE_TYPE_LINK;
2920 }
else if (S_ISREG(statinfo.st_mode)) {
2921 *type = OS_FILE_TYPE_FILE;
2923 *type = OS_FILE_TYPE_UNKNOWN;
2945 struct _stat statinfo;
2947 ret = _stat(path, &statinfo);
2948 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2955 os_file_handle_error_no_exit(path,
"stat");
2959 if (_S_IFDIR & statinfo.st_mode) {
2960 stat_info->
type = OS_FILE_TYPE_DIR;
2961 }
else if (_S_IFREG & statinfo.st_mode) {
2962 stat_info->
type = OS_FILE_TYPE_FILE;
2964 stat_info->
type = OS_FILE_TYPE_UNKNOWN;
2967 stat_info->
ctime = statinfo.st_ctime;
2968 stat_info->
atime = statinfo.st_atime;
2969 stat_info->
mtime = statinfo.st_mtime;
2970 stat_info->
size = statinfo.st_size;
2975 struct stat statinfo;
2977 ret = stat(path, &statinfo);
2979 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2986 os_file_handle_error_no_exit(path,
"stat");
2991 if (S_ISDIR(statinfo.st_mode)) {
2992 stat_info->
type = OS_FILE_TYPE_DIR;
2993 }
else if (S_ISLNK(statinfo.st_mode)) {
2994 stat_info->
type = OS_FILE_TYPE_LINK;
2995 }
else if (S_ISREG(statinfo.st_mode)) {
2996 stat_info->
type = OS_FILE_TYPE_FILE;
2998 stat_info->
type = OS_FILE_TYPE_UNKNOWN;
3001 stat_info->
ctime = statinfo.st_ctime;
3002 stat_info->
atime = statinfo.st_atime;
3003 stat_info->
mtime = statinfo.st_mtime;
3004 stat_info->
size = statinfo.st_size;
3012 # define OS_FILE_PATH_SEPARATOR '\\'
3014 # define OS_FILE_PATH_SEPARATOR '/'
3052 const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
3061 if (last_slash == path) {
3082 ibool success, subdir_exists;
3083 os_file_type_t type;
3086 if (strlen(subdir) == 1
3087 && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir ==
'.')) {
3096 if (success && !subdir_exists) {
3112 #ifndef UNIV_HOTBACKUP
3118 os_aio_array_get_nth_slot(
3123 ut_a(index < array->n_slots);
3125 return((array->
slots) + index);
3128 #if defined(LINUX_NATIVE_AIO)
3134 os_aio_linux_create_io_ctx(
3137 io_context_t* io_ctx)
3143 memset(io_ctx, 0x0,
sizeof(*io_ctx));
3148 ret = io_setup(max_events, io_ctx);
3150 #if defined(UNIV_AIO_DEBUG)
3152 "InnoDB: Linux native AIO:"
3153 " initialized io_ctx for segment\n");
3167 " InnoDB: Warning: io_setup() failed"
3168 " with EAGAIN. Will make %d attempts"
3169 " before giving up.\n",
3170 OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
3173 if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
3176 "InnoDB: Warning: io_setup() attempt"
3186 " InnoDB: Error: io_setup() failed"
3187 " with EAGAIN after %d attempts.\n",
3188 OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
3194 " InnoDB: Error: Linux Native AIO interface"
3195 " is not supported on this platform. Please"
3196 " check your OS documentation and install"
3197 " appropriate binary of InnoDB.\n");
3204 " InnoDB: Error: Linux Native AIO setup"
3205 " returned following error[%d]\n", -ret);
3210 "InnoDB: You can disable Linux Native AIO by"
3211 " setting innodb_native_aio = off in my.cnf\n");
3223 os_aio_array_create(
3235 #elif defined(LINUX_NATIVE_AIO)
3236 struct io_event* aio_event = NULL;
3239 ut_a(n_segments > 0);
3255 array->handles =
ut_malloc(n *
sizeof(HANDLE));
3258 #if defined(LINUX_NATIVE_AIO)
3259 array->aio_ctx = NULL;
3260 array->aio_events = NULL;
3264 if (!srv_use_native_aio) {
3265 goto skip_native_aio;
3271 array->aio_ctx = (io_context**)
ut_malloc(n_segments *
3272 sizeof(*array->aio_ctx));
3273 for (i = 0; i < n_segments; ++i) {
3274 if (!os_aio_linux_create_io_ctx(n/n_segments,
3275 &array->aio_ctx[i])) {
3287 aio_event = (io_event*)
ut_malloc(n *
sizeof(io_event));
3288 memset(aio_event, 0x0,
sizeof(io_event) * n);
3289 array->aio_events = aio_event;
3293 for (i = 0; i < n; i++) {
3294 slot = os_aio_array_get_nth_slot(array, i);
3299 slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
3301 over = &(slot->control);
3303 over->hEvent = slot->handle;
3305 *((array->handles) + i) = over->hEvent;
3307 #elif defined(LINUX_NATIVE_AIO)
3309 memset(&slot->control, 0x0,
sizeof(slot->control));
3329 for (i = 0; i < array->
n_slots; i++) {
3331 CloseHandle(slot->handle);
3342 #if defined(LINUX_NATIVE_AIO)
3343 if (srv_use_native_aio) {
3372 ulint n_segments = 2 + n_read_segs + n_write_segs;
3374 ut_ad(n_segments >= 4);
3378 for (i = 0; i < n_segments; i++) {
3385 os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
3386 if (os_aio_ibuf_array == NULL) {
3390 srv_io_thread_function[0] =
"insert buffer thread";
3392 os_aio_log_array = os_aio_array_create(n_per_seg, 1);
3393 if (os_aio_log_array == NULL) {
3397 srv_io_thread_function[1] =
"log thread";
3399 os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
3401 if (os_aio_read_array == NULL) {
3405 for (i = 2; i < 2 + n_read_segs; i++) {
3406 ut_a(i < SRV_MAX_N_IO_THREADS);
3407 srv_io_thread_function[i] =
"read thread";
3410 os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
3412 if (os_aio_write_array == NULL) {
3416 for (i = 2 + n_read_segs; i < n_segments; i++) {
3417 ut_a(i < SRV_MAX_N_IO_THREADS);
3418 srv_io_thread_function[i] =
"write thread";
3421 os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
3422 if (os_aio_sync_array == NULL) {
3427 os_aio_n_segments = n_segments;
3431 os_aio_segment_wait_events =
static_cast<os_event_t *
>(
ut_malloc(n_segments *
sizeof(
void*)));
3433 for (i = 0; i < n_segments; i++) {
3437 os_last_printout = time(NULL);
3455 os_aio_array_free(os_aio_ibuf_array);
3456 os_aio_ibuf_array = NULL;
3457 os_aio_array_free(os_aio_log_array);
3458 os_aio_log_array = NULL;
3459 os_aio_array_free(os_aio_read_array);
3460 os_aio_read_array = NULL;
3461 os_aio_array_free(os_aio_write_array);
3462 os_aio_write_array = NULL;
3463 os_aio_array_free(os_aio_sync_array);
3464 os_aio_sync_array = NULL;
3466 for (i = 0; i < os_aio_n_segments; i++) {
3470 ut_free(os_aio_segment_wait_events);
3471 os_aio_segment_wait_events = 0;
3472 os_aio_n_segments = 0;
3481 os_aio_array_wake_win_aio_at_shutdown(
3487 for (i = 0; i < array->
n_slots; i++) {
3489 SetEvent((array->
slots + i)->handle);
3506 os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
3507 os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
3508 os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
3509 os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
3511 #elif defined(LINUX_NATIVE_AIO)
3518 if (srv_use_native_aio) {
3526 for (i = 0; i < os_aio_n_segments; i++) {
3540 os_event_wait(os_aio_write_array->
is_empty);
3549 os_aio_get_segment_no_from_slot(
3557 if (array == os_aio_ibuf_array) {
3560 }
else if (array == os_aio_log_array) {
3563 }
else if (array == os_aio_read_array) {
3564 seg_len = os_aio_read_array->
n_slots
3567 segment = 2 + slot->
pos / seg_len;
3569 ut_a(array == os_aio_write_array);
3570 seg_len = os_aio_write_array->
n_slots
3574 + slot->
pos / seg_len;
3585 os_aio_get_array_and_local_segment(
3588 ulint global_segment)
3592 ut_a(global_segment < os_aio_n_segments);
3594 if (global_segment == 0) {
3595 *array = os_aio_ibuf_array;
3598 }
else if (global_segment == 1) {
3599 *array = os_aio_log_array;
3602 }
else if (global_segment < os_aio_read_array->n_segments + 2) {
3603 *array = os_aio_read_array;
3605 segment = global_segment - 2;
3607 *array = os_aio_write_array;
3609 segment = global_segment - (os_aio_read_array->
n_segments + 2);
3621 os_aio_array_reserve_slot(
3642 OVERLAPPED* control;
3644 #elif defined(LINUX_NATIVE_AIO)
3652 ulint slots_per_seg;
3656 ut_a((len & 0xFFFFFFFFUL) == len);
3665 local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
3674 if (!srv_use_native_aio) {
3689 for (i = local_seg * slots_per_seg, counter = 0;
3690 counter < array->
n_slots; i++, counter++) {
3693 slot = os_aio_array_get_nth_slot(array, i);
3723 slot->
buf =
static_cast<unsigned char *
>(buf);
3729 control = &(slot->control);
3730 control->Offset = (DWORD)offset;
3731 control->OffsetHigh = (DWORD)offset_high;
3732 ResetEvent(slot->handle);
3734 #elif defined(LINUX_NATIVE_AIO)
3737 if (!srv_use_native_aio) {
3738 goto skip_native_aio;
3743 if (
sizeof(aio_offset) == 8) {
3744 aio_offset = offset_high;
3746 aio_offset += offset;
3748 ut_a(offset_high == 0);
3749 aio_offset = offset;
3752 iocb = &slot->control;
3755 io_prep_pread(iocb, file, buf, len, aio_offset);
3757 ut_a(type == OS_FILE_WRITE);
3758 io_prep_pwrite(iocb, file, buf, len, aio_offset);
3761 iocb->data = (
void*)slot;
3778 os_aio_array_free_slot(
3804 ResetEvent(slot->handle);
3806 #elif defined(LINUX_NATIVE_AIO)
3808 if (srv_use_native_aio) {
3809 memset(&slot->control, 0x0,
sizeof(slot->control));
3816 ut_ad(slot->n_bytes == 0);
3817 ut_ad(slot->ret == 0);
3828 os_aio_simulated_wake_handler_thread(
3830 ulint global_segment)
3839 ut_ad(!srv_use_native_aio);
3841 segment = os_aio_get_array_and_local_segment(&array, global_segment);
3849 for (i = 0; i < n; i++) {
3850 slot = os_aio_array_get_nth_slot(array, i + segment * n);
3862 os_event_set(os_aio_segment_wait_events[global_segment]);
3875 if (srv_use_native_aio) {
3881 os_aio_recommend_sleep_for_read_threads = FALSE;
3883 for (i = 0; i < os_aio_n_segments; i++) {
3884 os_aio_simulated_wake_handler_thread(i);
3907 if (srv_use_native_aio) {
3913 os_aio_recommend_sleep_for_read_threads = TRUE;
3915 for (g = 0; g < os_aio_n_segments; g++) {
3916 os_aio_get_array_and_local_segment(&array, g);
3918 if (array == os_aio_read_array) {
3926 #if defined(LINUX_NATIVE_AIO)
3932 os_aio_linux_dispatch(
3941 ut_ad(slot != NULL);
3950 iocb = &slot->control;
3953 ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
3955 #if defined(UNIV_AIO_DEBUG)
3957 "io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n",
3958 (slot->
type == OS_FILE_WRITE) ?
'w' :
'r', ret, slot,
3959 array->aio_ctx[io_ctx_index], (ulong)io_ctx_index);
3964 if (UNIV_UNLIKELY(ret != 1)) {
4020 DWORD len = (DWORD) n;
4025 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
4037 ut_ad((n & 0xFFFFFFFFUL) == n);
4041 mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
4045 && !srv_use_native_aio
4056 return(os_file_read(file, buf, offset,
4060 ut_a(type == OS_FILE_WRITE);
4062 return(os_file_write(name, file, buf, offset, offset_high, n));
4065 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
4070 array = os_aio_read_array;
4072 array = os_aio_write_array;
4081 array = os_aio_ibuf_array;
4084 array = os_aio_log_array;
4086 array = os_aio_sync_array;
4088 #if defined(LINUX_NATIVE_AIO)
4090 ut_a(!srv_use_native_aio);
4097 slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
4098 name, buf, offset, offset_high, n);
4100 if (srv_use_native_aio) {
4102 os_bytes_read_since_printout += n;
4104 ret = ReadFile(file, buf, (DWORD)n, &len,
4107 #elif defined(LINUX_NATIVE_AIO)
4108 if (!os_aio_linux_dispatch(array, slot)) {
4114 os_aio_simulated_wake_handler_thread(
4115 os_aio_get_segment_no_from_slot(
4119 }
else if (type == OS_FILE_WRITE) {
4120 if (srv_use_native_aio) {
4123 ret = WriteFile(file, buf, (DWORD)n, &len,
4126 #elif defined(LINUX_NATIVE_AIO)
4127 if (!os_aio_linux_dispatch(array, slot)) {
4133 os_aio_simulated_wake_handler_thread(
4134 os_aio_get_segment_no_from_slot(
4143 if (srv_use_native_aio) {
4144 if ((ret && len == n)
4145 || (!ret && GetLastError() == ERROR_IO_PENDING)) {
4154 retval = os_aio_windows_handle(ULINT_UNDEFINED,
4172 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
4174 os_aio_array_free_slot(array, slot);
4176 retry = os_file_handle_error(name,
4178 ?
"aio read" :
"aio write");
4199 os_aio_windows_handle(
4219 ulint orig_seg = segment;
4229 if (segment == ULINT_UNDEFINED) {
4230 array = os_aio_sync_array;
4233 segment = os_aio_get_array_and_local_segment(&array, segment);
4240 ut_ad(segment < array->n_segments);
4244 if (array == os_aio_sync_array) {
4245 WaitForSingleObject(
4246 os_aio_array_get_nth_slot(array, pos)->
handle,
4251 i = WaitForMultipleObjects((DWORD) n,
4252 array->handles + segment * n,
4263 slot = os_aio_array_get_nth_slot(array, i + segment * n);
4267 if (orig_seg != ULINT_UNDEFINED) {
4269 "get windows aio return value");
4272 ret = GetOverlappedResult(slot->
file, &(slot->control), &len, TRUE);
4279 if (ret && len == slot->
len) {
4282 #ifdef UNIV_DO_FLUSH
4283 if (slot->
type == OS_FILE_WRITE
4284 && !os_do_not_call_flush_at_each_write) {
4285 if (!os_file_flush(slot->
file)) {
4290 }
else if (os_file_handle_error(slot->
name,
"Windows aio")) {
4308 struct PSI_file_locker* locker = NULL;
4309 register_pfs_file_io_begin(locker, slot->
file, slot->
len,
4310 (slot->
type == OS_FILE_WRITE)
4313 __FILE__, __LINE__);
4316 ut_a((slot->
len & 0xFFFFFFFFUL) == slot->
len);
4318 switch (slot->
type) {
4320 ret = WriteFile(slot->
file, slot->
buf,
4321 (DWORD) slot->
len, &len,
4326 ret = ReadFile(slot->
file, slot->
buf,
4327 (DWORD) slot->
len, &len,
4336 register_pfs_file_io_end(locker, len);
4339 if (!ret && GetLastError() == ERROR_IO_PENDING) {
4346 ret = GetOverlappedResult(slot->
file,
4351 ret_val = ret && len == slot->
len;
4354 os_aio_array_free_slot(array, slot);
4360 #if defined(LINUX_NATIVE_AIO)
4374 os_aio_linux_collect(
4384 struct timespec timeout;
4385 struct io_event* events;
4386 struct io_context* io_ctx;
4389 ut_ad(array != NULL);
4390 ut_ad(seg_size > 0);
4391 ut_ad(segment < array->n_segments);
4394 events = &array->aio_events[segment * seg_size];
4397 io_ctx = array->aio_ctx[segment];
4400 start_pos = segment * seg_size;
4403 end_pos = start_pos + seg_size;
4417 memset(events, 0,
sizeof(*events) * seg_size);
4419 timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
4421 ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
4428 if (ret == -EAGAIN) {
4436 if (ret == -EINTR) {
4446 if (UNIV_UNLIKELY(ret < 0)) {
4449 " InnoDB: unexpected ret_code[%d] from"
4450 " io_getevents()!\n", ret);
4456 for (i = 0; i < ret; i++) {
4458 struct iocb* control;
4460 control = (
struct iocb *)events[i].obj;
4461 ut_a(control != NULL);
4469 #if defined(UNIV_AIO_DEBUG)
4471 "io_getevents[%c]: slot[%p] ctx[%p]"
4473 (slot->
type == OS_FILE_WRITE) ?
'w' :
'r',
4474 slot, io_ctx, segment);
4486 slot->n_bytes = events[i].res;
4487 slot->ret = events[i].res2;
4505 os_aio_linux_handle(
4528 ut_a(global_seg != ULINT_UNDEFINED);
4531 segment = os_aio_get_array_and_local_segment(&array, global_seg);
4537 for (i = 0; i < n; ++i) {
4538 slot = os_aio_array_get_nth_slot(
4539 array, i + segment * n);
4553 "waiting for completed aio requests");
4554 os_aio_linux_collect(array, segment, n);
4563 "processing completed aio requests");
4568 ut_ad(slot != NULL);
4577 if ((slot->ret == 0) && (slot->n_bytes == (
long)slot->
len)) {
4580 #ifdef UNIV_DO_FLUSH
4581 if (slot->
type == OS_FILE_WRITE
4582 && !os_do_not_call_flush_at_each_write)
4583 && !os_file_flush(slot->
file) {
4597 os_file_handle_error(slot->
name,
"Linux aio");
4604 os_aio_array_free_slot(array, slot);
4618 ulint global_segment,
4636 ulint n_consecutive;
4639 ulint lowest_offset;
4643 byte* combined_buf2;
4649 *consecutive_ios = NULL;
4651 memset(consecutive_ios, 0,
sizeof(
os_aio_slot_t*) * OS_AIO_MERGE_N_CONSECUTIVE);
4652 segment = os_aio_get_array_and_local_segment(&array, global_segment);
4659 "looking for i/o requests (a)");
4661 ut_ad(segment < array->n_segments);
4667 if (array == os_aio_read_array
4668 && os_aio_recommend_sleep_for_read_threads) {
4673 goto recommended_sleep;
4679 "looking for i/o requests (b)");
4684 for (i = 0; i < n; i++) {
4685 slot = os_aio_array_get_nth_slot(array, i + segment * n);
4691 "InnoDB: i/o for slot %lu"
4692 " already done, returning\n",
4709 lowest_offset = ULINT_MAX;
4711 for (i = 0; i < n; i++) {
4712 slot = os_aio_array_get_nth_slot(array, i + segment * n);
4715 age = (ulint)difftime(time(NULL),
4718 if ((age >= 2 && age > biggest_age)
4719 || (age >= 2 && age == biggest_age
4720 && slot->
offset < lowest_offset)) {
4723 consecutive_ios[0] = slot;
4728 lowest_offset = slot->
offset;
4733 if (n_consecutive == 0) {
4738 lowest_offset = ULINT_MAX;
4740 for (i = 0; i < n; i++) {
4741 slot = os_aio_array_get_nth_slot(array,
4747 consecutive_ios[0] = slot;
4751 lowest_offset = slot->
offset;
4756 if (n_consecutive == 0) {
4765 ut_ad(n_consecutive != 0);
4766 ut_ad(consecutive_ios[0] != NULL);
4768 slot = consecutive_ios[0];
4773 for (i = 0; i < n; i++) {
4774 slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
4776 if (slot2->
reserved && slot2 != slot
4786 consecutive_ios[n_consecutive] = slot2;
4791 if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
4793 goto consecutive_loop;
4807 slot = consecutive_ios[0];
4809 for (i = 0; i < n_consecutive; i++) {
4810 total_len += consecutive_ios[i]->
len;
4813 if (n_consecutive == 1) {
4815 combined_buf = slot->
buf;
4816 combined_buf2 = NULL;
4818 combined_buf2 =
static_cast<unsigned char *
>(
ut_malloc(total_len + UNIV_PAGE_SIZE));
4820 ut_a(combined_buf2);
4822 combined_buf =
static_cast<unsigned char *
>(
ut_align(combined_buf2, UNIV_PAGE_SIZE));
4831 if (slot->
type == OS_FILE_WRITE && n_consecutive > 1) {
4835 for (i = 0; i < n_consecutive; i++) {
4837 ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
4838 consecutive_ios[i]->len);
4839 offs += consecutive_ios[i]->
len;
4847 "InnoDB: doing i/o of type %lu at offset %lu %lu,"
4850 (ulong) slot->
offset, (ulong) total_len);
4854 if (slot->
type == OS_FILE_WRITE) {
4855 ret = os_file_write(slot->
name, slot->
file, combined_buf,
4859 ret = os_file_read(slot->
file, combined_buf,
4868 "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
4869 n_consecutive, global_segment, slot->
offset / UNIV_PAGE_SIZE);
4876 for (i = 0; i < n_consecutive; i++) {
4878 ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
4879 consecutive_ios[i]->len);
4880 offs += consecutive_ios[i]->
len;
4884 if (combined_buf2) {
4892 for (i = 0; i < n_consecutive; i++) {
4911 os_aio_array_free_slot(array, slot);
4928 os_event_wait(os_aio_segment_wait_events[global_segment]);
4932 "InnoDB: i/o handler thread for i/o"
4933 " segment %lu wakes up\n",
4934 (ulong) global_segment);
4945 os_aio_array_validate(
4950 ulint n_reserved = 0;
4960 for (i = 0; i < array->
n_slots; i++) {
4961 slot = os_aio_array_get_nth_slot(array, i);
4984 os_aio_array_validate(os_aio_read_array);
4985 os_aio_array_validate(os_aio_write_array);
4986 os_aio_array_validate(os_aio_ibuf_array);
4987 os_aio_array_validate(os_aio_log_array);
4988 os_aio_array_validate(os_aio_sync_array);
5000 os_aio_print_segment_info(
5016 fprintf(file,
" [");
5019 fprintf(file,
", ");
5022 fprintf(file,
"%lu", n_seg[i]);
5024 fprintf(file,
"] ");
5038 ulint n_res_seg[SRV_MAX_N_IO_THREADS];
5039 time_t current_time;
5040 double time_elapsed;
5041 double avg_bytes_read;
5044 for (i = 0; i < srv_n_file_io_threads; i++) {
5045 fprintf(file,
"I/O thread %lu state: %s (%s)", (ulong) i,
5046 srv_io_thread_op_info[i],
5047 srv_io_thread_function[i]);
5050 if (os_aio_segment_wait_events[i]->is_set) {
5051 fprintf(file,
" ev set");
5055 fprintf(file,
"\n");
5058 fputs(
"Pending normal aio reads:", file);
5060 array = os_aio_read_array;
5071 memset(n_res_seg, 0x0,
sizeof(n_res_seg));
5073 for (i = 0; i < array->
n_slots; i++) {
5076 slot = os_aio_array_get_nth_slot(array, i);
5081 n_res_seg[seg_no]++;
5083 fprintf(stderr,
"Reserved slot, messages %p %p\n",
5093 fprintf(file,
" %lu", (ulong) n_reserved);
5095 os_aio_print_segment_info(file, n_res_seg, array);
5099 if (array == os_aio_read_array) {
5100 fputs(
", aio writes:", file);
5102 array = os_aio_write_array;
5107 if (array == os_aio_write_array) {
5108 fputs(
",\n ibuf aio reads:", file);
5109 array = os_aio_ibuf_array;
5114 if (array == os_aio_ibuf_array) {
5115 fputs(
", log i/o's:", file);
5116 array = os_aio_log_array;
5121 if (array == os_aio_log_array) {
5122 fputs(
", sync i/o's:", file);
5123 array = os_aio_sync_array;
5129 current_time = time(NULL);
5130 time_elapsed = 0.001 + difftime(current_time, os_last_printout);
5133 "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
5134 "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
5135 (ulong) fil_n_pending_log_flushes,
5136 (ulong) fil_n_pending_tablespace_flushes,
5137 (ulong) os_n_file_reads, (ulong) os_n_file_writes,
5138 (ulong) os_n_fsyncs);
5142 "%lu pending preads, %lu pending pwrites\n",
5147 if (os_n_file_reads == os_n_file_reads_old) {
5148 avg_bytes_read = 0.0;
5150 avg_bytes_read = (double) os_bytes_read_since_printout
5151 / (os_n_file_reads - os_n_file_reads_old);
5155 "%.2f reads/s, %lu avg bytes/read,"
5156 " %.2f writes/s, %.2f fsyncs/s\n",
5157 (os_n_file_reads - os_n_file_reads_old)
5159 (ulong)avg_bytes_read,
5160 (os_n_file_writes - os_n_file_writes_old)
5162 (os_n_fsyncs - os_n_fsyncs_old)
5165 os_n_file_reads_old = os_n_file_reads;
5166 os_n_file_writes_old = os_n_file_writes;
5167 os_n_fsyncs_old = os_n_fsyncs;
5168 os_bytes_read_since_printout = 0;
5170 os_last_printout = current_time;
5180 os_n_file_reads_old = os_n_file_reads;
5181 os_n_file_writes_old = os_n_file_writes;
5182 os_n_fsyncs_old = os_n_fsyncs;
5183 os_bytes_read_since_printout = 0;
5185 os_last_printout = time(NULL);
5195 os_aio_all_slots_free(
void)
5201 array = os_aio_read_array;
5209 array = os_aio_write_array;
5217 array = os_aio_ibuf_array;
5225 array = os_aio_log_array;
5233 array = os_aio_sync_array;