Drizzled Public API Documentation

logging_query.cc
1 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008, 2009 Sun Microsystems, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19 
20 #include <config.h>
21 #include <drizzled/plugin.h>
22 #include <drizzled/plugin/logging.h>
23 #include <drizzled/gettext.h>
24 #include <drizzled/session.h>
25 #include <drizzled/session/times.h>
26 #include <drizzled/sql_parse.h>
27 #include PCRE_HEADER
28 #include <limits.h>
29 #include <sys/time.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <string>
34 #include <boost/format.hpp>
35 #include <boost/program_options.hpp>
37 #include <cstdio>
38 #include <cerrno>
39 
40 namespace po= boost::program_options;
41 using namespace drizzled;
42 using namespace std;
43 
44 #define ESCAPE_CHAR '\\'
45 #define SEPARATOR_CHAR ','
46 
47 namespace drizzle_plugin
48 {
49 
50 static bool sysvar_logging_query_enable= false;
51 /* TODO fix these to not be unsigned long once we have sensible sys_var system */
52 static uint32_constraint sysvar_logging_query_threshold_slow;
53 static uint32_constraint sysvar_logging_query_threshold_big_resultset;
54 static uint32_constraint sysvar_logging_query_threshold_big_examined;
55 
56 /* quote a string to be safe to include in a CSV line
57  that means backslash quoting all commas, doublequotes, backslashes,
58  and all the ASCII unprintable characters
59  as long as we pass the high-bit bytes unchanged
60  this is safe to do to a UTF8 string
61  we dont allow overrunning the targetbuffer
62  to avoid having a very long query overwrite memory
63 
64  TODO consider remapping the unprintables instead to "Printable
65  Representation", the Unicode characters from the area U+2400 to
66  U+2421 reserved for representing control characters when it is
67  necessary to print or display them rather than have them perform
68  their intended function.
69 
70 */
71 
72 static void quotify(const string &src, string &dst)
73 {
74  static const char hexit[]= { '0', '1', '2', '3', '4', '5', '6', '7',
75  '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
76  string::const_iterator src_iter;
77 
78  for (src_iter= src.begin(); src_iter < src.end(); ++src_iter)
79  {
80  if (static_cast<unsigned char>(*src_iter) > 0x7f)
81  {
82  dst.push_back(*src_iter);
83  }
84  else if (*src_iter == 0x00) // null
85  {
86  dst.push_back(ESCAPE_CHAR); dst.push_back('0');
87  }
88  else if (*src_iter == 0x07) // bell
89  {
90  dst.push_back(ESCAPE_CHAR); dst.push_back('a');
91  }
92  else if (*src_iter == 0x08) // backspace
93  {
94  dst.push_back(ESCAPE_CHAR); dst.push_back('b');
95  }
96  else if (*src_iter == 0x09) // horiz tab
97  {
98  dst.push_back(ESCAPE_CHAR); dst.push_back('t');
99  }
100  else if (*src_iter == 0x0a) // line feed
101  {
102  dst.push_back(ESCAPE_CHAR); dst.push_back('n');
103  }
104  else if (*src_iter == 0x0b) // vert tab
105  {
106  dst.push_back(ESCAPE_CHAR); dst.push_back('v');
107  }
108  else if (*src_iter == 0x0c) // formfeed
109  {
110  dst.push_back(ESCAPE_CHAR); dst.push_back('f');
111  }
112  else if (*src_iter == 0x0d) // carrage return
113  {
114  dst.push_back(ESCAPE_CHAR); dst.push_back('r');
115  }
116  else if (*src_iter == 0x1b) // escape
117  {
118  dst.push_back(ESCAPE_CHAR); dst.push_back('e');
119  }
120  else if (*src_iter == 0x22) // quotation mark
121  {
122  dst.push_back(ESCAPE_CHAR); dst.push_back(0x22);
123  }
124  else if (*src_iter == SEPARATOR_CHAR)
125  {
126  dst.push_back(ESCAPE_CHAR); dst.push_back(SEPARATOR_CHAR);
127  }
128  else if (*src_iter == ESCAPE_CHAR)
129  {
130  dst.push_back(ESCAPE_CHAR); dst.push_back(ESCAPE_CHAR);
131  }
132  else if ((*src_iter < 0x20) || (*src_iter == 0x7F)) // other unprintable ASCII
133  {
134  dst.push_back(ESCAPE_CHAR);
135  dst.push_back('x');
136  dst.push_back(hexit[(*src_iter >> 4) & 0x0f]);
137  dst.push_back(hexit[*src_iter & 0x0f]);
138  }
139  else // everything else
140  {
141  dst.push_back(*src_iter);
142  }
143  }
144 }
145 
146 
148 {
149  const std::string _filename;
150  const std::string _query_pcre;
151  int fd;
152  pcre *re;
153  pcre_extra *pe;
154 
156  boost::format formatter;
157 
158 public:
159 
160  Logging_query(const std::string &filename,
161  const std::string &query_pcre) :
162  drizzled::plugin::Logging("csv_query_log"),
163  _filename(filename),
164  _query_pcre(query_pcre),
165  fd(-1), re(NULL), pe(NULL),
166  formatter("%1%,%2%,%3%,\"%4%\",\"%5%\",\"%6%\",%7%,%8%,"
167  "%9%,%10%,%11%,%12%,%13%,%14%,\"%15%\"\n")
168  {
169 
170  /* if there is no destination filename, dont bother doing anything */
171  if (_filename.empty())
172  return;
173 
174  fd= open(_filename.c_str(),
175  O_WRONLY | O_APPEND | O_CREAT,
176  S_IRUSR|S_IWUSR);
177 
178  if (fd < 0)
179  {
180  sql_perror( _("fail open()"), _filename);
181  return;
182  }
183 
184  if (not _query_pcre.empty())
185  {
186  const char *this_pcre_error;
187  int this_pcre_erroffset;
188  re= pcre_compile(_query_pcre.c_str(), 0, &this_pcre_error,
189  &this_pcre_erroffset, NULL);
190  pe= pcre_study(re, 0, &this_pcre_error);
191  /* TODO emit error messages if there is a problem */
192  }
193  }
194 
195  ~Logging_query()
196  {
197  if (fd >= 0)
198  {
199  close(fd);
200  }
201 
202  if (pe != NULL)
203  {
204  pcre_free(pe);
205  }
206 
207  if (re != NULL)
208  {
209  pcre_free(re);
210  }
211  }
212 
213  virtual bool post (Session *session)
214  {
215  size_t wrv;
216 
217  assert(session != NULL);
218 
219  if (fd < 0)
220  return false;
221 
222  /* Yes, we know that checking sysvar_logging_query_enable,
223  sysvar_logging_query_threshold_big_resultset, and
224  sysvar_logging_query_threshold_big_examined is not threadsafe,
225  because some other thread might change these sysvars. But we
226  don't care. We might start logging a little late as it spreads
227  to other threads. Big deal. */
228 
229  // return if not enabled or query was too fast or resultset was too small
230  if (sysvar_logging_query_enable == false)
231  return false;
232  if (session->sent_row_count < sysvar_logging_query_threshold_big_resultset.get())
233  return false;
234  if (session->examined_row_count < sysvar_logging_query_threshold_big_examined.get())
235  return false;
236 
237  /*
238  TODO, the session object should have a "utime command completed"
239  inside itself, so be more accurate, and so this doesnt have to
240  keep calling current_utime, which can be slow.
241  */
242  uint64_t t_mark= session->times.getCurrentTimestamp(false);
243 
244  if (session->times.getElapsedTime() < sysvar_logging_query_threshold_slow.get())
245  return false;
246 
247  Session::QueryString query_string(session->getQueryString());
248  if (query_string == NULL)
249  {
250  return false;
251  }
252 
253  if (re)
254  {
255  int this_pcre_rc;
256  this_pcre_rc= pcre_exec(re, pe, query_string->c_str(), query_string->length(), 0, 0, NULL, 0);
257  if (this_pcre_rc < 0)
258  return false;
259  }
260 
261  // buffer to quotify the query
262  string qs;
263 
264  // Since quotify() builds the quoted string incrementally, we can
265  // avoid some reallocating if we reserve some space up front.
266  qs.reserve(query_string->length());
267 
268  quotify(*query_string, qs);
269 
270  // to avoid trying to printf %s something that is potentially NULL
271  util::string::ptr schema(session->schema());
272  formatter % t_mark
273  % session->thread_id
274  % session->getQueryId()
275  % (schema ? schema->c_str() : "")
276  % qs
277  % getCommandName(session->command)
278  % (t_mark - session->times.getConnectMicroseconds())
279  % session->times.getElapsedTime()
280  % (t_mark - session->times.utime_after_lock)
281  % session->sent_row_count
282  % session->examined_row_count
283  % session->tmp_table
284  % session->total_warn_count
285  % session->getServerId()
286  % getServerHostname();
287 
288  string msgbuf= formatter.str();
289 
290  // a single write has a kernel thread lock, thus no need mutex guard this
291  wrv= write(fd, msgbuf.c_str(), msgbuf.length());
292  assert(wrv == msgbuf.length());
293 
294  return false;
295  }
296 };
297 
298 static int logging_query_plugin_init(drizzled::module::Context &context)
299 {
300  const module::option_map &vm= context.getOptions();
301 
302  if (vm.count("filename"))
303  {
304  context.add(new Logging_query(vm["filename"].as<string>(),
305  vm["pcre"].as<string>()));
306  context.registerVariable(new sys_var_bool_ptr("enable", &sysvar_logging_query_enable));
307  context.registerVariable(new sys_var_const_string_val("filename", vm["filename"].as<string>()));
308  context.registerVariable(new sys_var_const_string_val("pcre", vm["pcre"].as<string>()));
309  context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_slow", sysvar_logging_query_threshold_slow));
310  context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_big_resultset", sysvar_logging_query_threshold_big_resultset));
311  context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_big_examined", sysvar_logging_query_threshold_big_examined));
312  }
313 
314  return 0;
315 }
316 
317 static void init_options(drizzled::module::option_context &context)
318 {
319  context("enable",
320  po::value<bool>(&sysvar_logging_query_enable)->default_value(false)->zero_tokens(),
321  _("Enable logging to CSV file"));
322  context("filename",
323  po::value<string>(),
324  _("File to log to"));
325  context("pcre",
326  po::value<string>()->default_value(""),
327  _("PCRE to match the query against"));
328  context("threshold-slow",
329  po::value<uint32_constraint>(&sysvar_logging_query_threshold_slow)->default_value(0),
330  _("Threshold for logging slow queries, in microseconds"));
331  context("threshold-big-resultset",
332  po::value<uint32_constraint>(&sysvar_logging_query_threshold_big_resultset)->default_value(0),
333  _("Threshold for logging big queries, for rows returned"));
334  context("threshold-big-examined",
335  po::value<uint32_constraint>(&sysvar_logging_query_threshold_big_examined)->default_value(0),
336  _("Threshold for logging big queries, for rows examined"));
337 }
338 
339 } /* namespace drizzle_plugin */
340 
341 DRIZZLE_DECLARE_PLUGIN
342 {
343  DRIZZLE_VERSION_ID,
344  "logging_query",
345  "0.2",
346  "Mark Atwood",
347  N_("Logs queries to a CSV file"),
348  PLUGIN_LICENSE_GPL,
349  drizzle_plugin::logging_query_plugin_init,
350  NULL,
351  drizzle_plugin::init_options
352 }
353 DRIZZLE_DECLARE_PLUGIN_END;