Skip to content

Commit

Permalink
[CBRD-22793] C++ regex gives a different result and error message on …
Browse files Browse the repository at this point in the history
…Windows (#2186)

http://jira.cubrid.org/browse/CBRD-22793

throws an error for the collation element syntax which doesn't work well in <regex> and make the same level of the error message by parsing the error code for linux and windows. added new header and source for regex related functions
  • Loading branch information
hgryoo authored Jan 31, 2020
1 parent 781302c commit 3371cd3
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 50 deletions.
2 changes: 2 additions & 0 deletions cs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,12 @@ set(QUERY_SOURCES
${QUERY_DIR}/query_method.c
${QUERY_DIR}/regu_var.cpp
${QUERY_DIR}/string_opfunc.c
${QUERY_DIR}/string_regex.cpp
${QUERY_DIR}/xasl_to_stream.c
)
set(QUERY_HEADERS
${QUERY_DIR}/query_monitoring.hpp
${QUERY_DIR}/string_regex.hpp
)

set(OBJECT_SOURCES
Expand Down
2 changes: 2 additions & 0 deletions cubrid/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ set(QUERY_SOURCES
${QUERY_DIR}/show_scan.c
${QUERY_DIR}/stream_to_xasl.c
${QUERY_DIR}/string_opfunc.c
${QUERY_DIR}/string_regex.cpp
${QUERY_DIR}/vacuum.c
${QUERY_DIR}/xasl_cache.c
)
Expand All @@ -232,6 +233,7 @@ set(QUERY_HEADERS
${QUERY_DIR}/query_monitoring.hpp
${QUERY_DIR}/query_reevaluation.hpp
${QUERY_DIR}/scan_json_table.hpp
${QUERY_DIR}/string_regex.hpp
)

set(OBJECT_SOURCES
Expand Down
2 changes: 2 additions & 0 deletions sa/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ set(QUERY_SOURCES
${QUERY_DIR}/show_scan.c
${QUERY_DIR}/stream_to_xasl.c
${QUERY_DIR}/string_opfunc.c
${QUERY_DIR}/string_regex.cpp
${QUERY_DIR}/vacuum.c
${QUERY_DIR}/xasl_cache.c
${QUERY_DIR}/xasl_to_stream.c
Expand All @@ -268,6 +269,7 @@ set(QUERY_HEADERS
${QUERY_DIR}/query_monitoring.hpp
${QUERY_DIR}/query_reevaluation.hpp
${QUERY_DIR}/scan_json_table.hpp
${QUERY_DIR}/string_regex.hpp
)

set(OBJECT_SOURCES
Expand Down
54 changes: 12 additions & 42 deletions src/query/string_opfunc.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@
#include "elo.h"
#include "es_common.h"
#include "db_elo.h"
#include "string_regex.hpp"

#include <algorithm>
#include <regex>
#include <string>
#if !defined (SERVER_MODE)
#include "parse_tree.h"
Expand Down Expand Up @@ -93,8 +94,6 @@
#define LOB_CHUNK_SIZE (128 * 1024)
#define DB_GET_UCHAR(dbval) (REINTERPRET_CAST (const unsigned char *, db_get_string ((dbval))))

#define REGEX_MAX_ERROR_MSG_SIZE 100

/*
* This enumeration type is used to categorize the different
* string types into function like groups.
Expand Down Expand Up @@ -4300,30 +4299,6 @@ db_string_like (const DB_VALUE * src_string, const DB_VALUE * pattern, const DB_
return ((*result == V_ERROR) ? ER_QSTR_INVALID_ESCAPE_SEQUENCE : error_status);
}

static int
regex_compile (const char *pattern, std::regex * &rx_compiled_regex,
std::regex_constants::syntax_option_type & reg_flags)
{
int error_status = NO_ERROR;

// *INDENT-OFF*
try
{
rx_compiled_regex = new std::regex (pattern, reg_flags);
}
catch (std::regex_error & e)
{
// regex compilation exception
error_status = ER_REGEX_COMPILE_ERROR;
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error_status, 1, e.what ());
delete rx_compiled_regex;
rx_compiled_regex = NULL;
}
// *INDENT-ON*

return error_status;
}

/*
* db_string_rlike () - check for match between string and regex
*
Expand All @@ -4350,10 +4325,9 @@ regex_compile (const char *pattern, std::regex * &rx_compiled_regex,
* An illegal pattern is specified.
*
*/

int
db_string_rlike (const DB_VALUE * src_string, const DB_VALUE * pattern, const DB_VALUE * case_sensitive,
std::regex ** comp_regex, char **comp_pattern, int *result)
cub_regex_object ** comp_regex, char **comp_pattern, int *result)
{
QSTR_CATEGORY src_category = QSTR_UNKNOWN;
QSTR_CATEGORY pattern_category = QSTR_UNKNOWN;
Expand All @@ -4366,12 +4340,8 @@ db_string_rlike (const DB_VALUE * src_string, const DB_VALUE * pattern, const DB
bool is_case_sensitive = false;
int src_length = 0, pattern_length = 0;

char rx_err_buf[REGEX_MAX_ERROR_MSG_SIZE] = { '\0' };
char *rx_compiled_pattern = NULL;

// *INDENT-OFF*
std::regex *rx_compiled_regex = NULL;
// *INDENT-ON*
cub_regex_object *rx_compiled_regex = NULL;

/* check for allocated DB values */
assert (src_string != NULL);
Expand Down Expand Up @@ -4472,39 +4442,39 @@ db_string_rlike (const DB_VALUE * src_string, const DB_VALUE * pattern, const DB
memcpy (rx_compiled_pattern, pattern_char_string_p, pattern_length);
rx_compiled_pattern[pattern_length] = '\0';

// *INDENT-OFF*
// *INDENT-OFF*
std::regex_constants::syntax_option_type reg_flags = std::regex_constants::ECMAScript;
reg_flags |= std::regex_constants::nosubs;
if (!is_case_sensitive)
{
reg_flags |= std::regex_constants::icase;
}
// *INDENT-ON*

error_status = regex_compile (rx_compiled_pattern, rx_compiled_regex, reg_flags);
error_status = cubregex::compile_regex <char, cubregex::cub_reg_traits> (rx_compiled_pattern, rx_compiled_regex, reg_flags);
if (error_status != NO_ERROR)
{
ASSERT_ERROR ();
*result = V_ERROR;
goto cleanup;
}
// *INDENT-ON*
}

// *INDENT-OFF*
// *INDENT-OFF*
try
{
std::string src (src_char_string_p, src_length);
bool match = std::regex_search (src, *rx_compiled_regex);
*result = match ? V_TRUE : V_FALSE;
}
catch (std::regex_error & e)
catch (std::regex_error &e)
{
// regex execution exception, error_complexity or error_stack
error_status = ER_REGEX_EXEC_ERROR;
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error_status, 1, e.what ());
std::string error_message = cubregex::parse_regex_exception (e);
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error_status, 1, error_message.c_str ());
*result = V_ERROR;
}
// *INDENT-ON*
// *INDENT-ON*

cleanup:

Expand Down
7 changes: 2 additions & 5 deletions src/query/string_opfunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@
#include "numeric_opfunc.h"
#include "object_domain.h"
#include "thread_compat.hpp"

#ifdef __cplusplus
#include <regex>
#endif
#include "string_regex.hpp"

#define QSTR_IS_CHAR(s) (((s)==DB_TYPE_CHAR) || \
((s)==DB_TYPE_VARCHAR))
Expand Down Expand Up @@ -222,7 +219,7 @@ extern int db_string_like (const DB_VALUE * src_string, const DB_VALUE * pattern

#ifdef __cplusplus
extern int db_string_rlike (const DB_VALUE * src_string, const DB_VALUE * pattern, const DB_VALUE * case_sensitive,
std::regex ** comp_regex, char **comp_pattern, int *result);
cub_regex_object ** comp_regex, char **comp_pattern, int *result);
#endif

extern int db_string_limit_size_string (DB_VALUE * src_string, DB_VALUE * result, const int new_size, int *spare_bytes);
Expand Down
127 changes: 127 additions & 0 deletions src/query/string_regex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (C) 2008 Search Solution Corporation. All rights reserved by Search Solution.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

//
// string_regex - definitions and functions related to regular expression
//

#include "string_regex.hpp"

#include "error_manager.h"
#include <string>

namespace cubregex
{
std::string
parse_regex_exception (std::regex_error &e)
{
std::string error_message;
using namespace std::regex_constants;
switch (e.code ())
{
case error_collate:
error_message.assign ("regex_error(error_collate): the expression contains an invalid collating element name");
break;
case error_ctype:
error_message.assign ("regex_error(error_ctype): the expression contains an invalid character class name");
break;
case error_escape:
error_message.assign ("regex_error(error_escape): the expression contains an invalid escaped character or a trailing escape");
break;
case error_backref:
error_message.assign ("regex_error(error_backref): the expression contains an invalid back reference");
break;
case error_brack:
error_message.assign ("regex_error(error_brack): the expression contains mismatched square brackets ('[' and ']')");
break;
case error_paren:
error_message.assign ("regex_error(error_paren): the expression contains mismatched parentheses ('(' and ')')");
break;
case error_brace:
error_message.assign ("regex_error(error_brace): the expression contains mismatched curly braces ('{' and '}')");
break;
case error_badbrace:
error_message.assign ("regex_error(error_badbrace): the expression contains an invalid range in a {} expression");
break;
case error_range:
error_message.assign ("regex_error(error_range): the expression contains an invalid character range (e.g. [b-a])");
break;
case error_space:
error_message.assign ("regex_error(error_space): there was not enough memory to convert the expression into a finite state machine");
break;
case error_badrepeat:
error_message.assign ("regex_error(error_badrepeat): one of *?+{ was not preceded by a valid regular expression");
break;
case error_complexity:
error_message.assign ("regex_error(error_complexity): the complexity of an attempted match exceeded a predefined level");
break;
case error_stack:
error_message.assign ("regex_error(error_stack): there was not enough memory to perform a match");
break;
default:
error_message.assign ("regex_error(error_unknown)");
break;
}
return error_message;
}

template< class CharT, class Reg_Traits >
int compile_regex (const CharT *pattern, std::basic_regex<CharT, Reg_Traits> *&rx_compiled_regex,
std::regex_constants::syntax_option_type &reg_flags)
{
{
int error_status = NO_ERROR;
std::basic_string <CharT> pattern_str (pattern);
try
{
#if defined(WINDOWS)
/* HACK: collating element features doesn't work well on Windows.
* And lookup_collatename is not invoked when regex pattern has collating element.
* It is hacky code finding collating element pattern and throw error.
*/
CharT *collate_elem_pattern = "[[.";
int found = pattern_str.find ( std::string (collate_elem_pattern));
if (found != std::string::npos)
{
throw std::regex_error (std::regex_constants::error_collate);
}
#endif
rx_compiled_regex = new std::basic_regex<CharT, Reg_Traits> (pattern_str, reg_flags);
}
catch (std::regex_error &e)
{
// regex compilation exception
error_status = ER_REGEX_COMPILE_ERROR;
std::string error_message = parse_regex_exception (e);
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error_status, 1, error_message.c_str ());
if (rx_compiled_regex != NULL)
{
delete rx_compiled_regex;
rx_compiled_regex = NULL;
}
}

return error_status;
}
}

template int compile_regex <char> (const char *pattern, std::basic_regex<char, cub_reg_traits> *&rx_compiled_regex,
std::regex_constants::syntax_option_type &reg_flags);

}
55 changes: 55 additions & 0 deletions src/query/string_regex.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (C) 2008 Search Solution Corporation. All rights reserved by Search Solution.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

//
// string_regex - definitions and functions related to regular expression
//

#ifndef _STRING_REGEX_HPP_
#define _STRING_REGEX_HPP_

#ifdef __cplusplus
#include <regex>

#include "error_manager.h"

namespace cubregex
{
/* it throws the error_collate when collatename syntax ([[. .]]), which gives an inconsistent result, is detected. */
struct cub_reg_traits : std::regex_traits<char>
{
template< class Iter >
string_type lookup_collatename ( Iter first, Iter last ) const
{
throw std::regex_error (std::regex_constants::error_collate);
}
};

/* because regex_error::what() gives different messages depending on compiler, an error message should be returned by error code of regex_error explicitly. */
std::string parse_regex_exception (std::regex_error &e);

template< class CharT, class Reg_Traits >
int compile_regex (const CharT *pattern, std::basic_regex<CharT, Reg_Traits> *&rx_compiled_regex,
std::regex_constants::syntax_option_type &reg_flags);
}

using cub_regex_object = std::basic_regex <char, cubregex::cub_reg_traits>;
#endif

#endif // _STRING_REGEX_HPP_
5 changes: 2 additions & 3 deletions src/xasl/xasl_predicate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
#define _XASL_PREDICATE_HPP_

#include "dbtype_def.h" // DB_TYPE

#include <regex>
#include "string_regex.hpp"

// forward definitions
class regu_variable_node;
Expand Down Expand Up @@ -134,7 +133,7 @@ namespace cubxasl
regu_variable_node *src;
regu_variable_node *pattern;
regu_variable_node *case_sensitive;
mutable std::regex *compiled_regex;
mutable cub_regex_object *compiled_regex;
mutable char *compiled_pattern;
};

Expand Down

0 comments on commit 3371cd3

Please sign in to comment.