Skip to content

Commit

Permalink
Fix segfault with regular expressions that matched the zero-length st…
Browse files Browse the repository at this point in the history
…ring.

In regular expressions, repetition operators`{0,0}` and `{,0}` are valid, but they always match the zero-length string. For instance, `a{0,0}` and `[a-z]{0,0}` both match the zero-length string.

When the whole regular expression consists in one of these repetitions it caused a segfault during the evaluation of the regular expression because the `forward_code_ref` field for the root atom in the atom's tree was null.

Closes #2084.
  • Loading branch information
plusvic committed Jun 19, 2024
1 parent 608fb3d commit 8616165
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 50 deletions.
16 changes: 0 additions & 16 deletions libyara/atoms.c
Original file line number Diff line number Diff line change
Expand Up @@ -1355,22 +1355,6 @@ int yr_atoms_extract_from_re(
*atoms = _yr_atoms_list_concat(*atoms, case_insensitive_atoms);
}

// No atoms has been extracted, let's add a zero-length atom.

if (*atoms == NULL)
{
*atoms = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));

if (*atoms == NULL)
return ERROR_INSUFFICIENT_MEMORY;

(*atoms)->atom.length = 0;
(*atoms)->backtrack = 0;
(*atoms)->forward_code_ref = re_ast->root_node->forward_code_ref;
(*atoms)->backward_code_ref = YR_ARENA_NULL_REF;
(*atoms)->next = NULL;
}

return ERROR_SUCCESS;
}

Expand Down
105 changes: 71 additions & 34 deletions libyara/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,19 +502,22 @@ static int _yr_parser_write_string(
literal_string->length + 1, // +1 to include terminating NULL
&ref);

if (result != ERROR_SUCCESS)
goto cleanup;

string->length = (uint32_t) literal_string->length;
string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);

if (result == ERROR_SUCCESS)
{
result = yr_atoms_extract_from_string(
&compiler->atoms_config,
(uint8_t*) literal_string->c_string,
(int32_t) literal_string->length,
modifier,
&atom_list,
min_atom_quality);
}
result = yr_atoms_extract_from_string(
&compiler->atoms_config,
(uint8_t*) literal_string->c_string,
(int32_t) literal_string->length,
modifier,
&atom_list,
min_atom_quality);

if (result != ERROR_SUCCESS)
goto cleanup;
}
else
{
Expand All @@ -524,37 +527,72 @@ static int _yr_parser_write_string(
// variable-length portions.
modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;

// Save the position where the RE forward code starts for later reference.
yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
compiler->arena, YR_RE_CODE_SECTION);

// Emit forwards code
result = yr_re_ast_emit_code(re_ast, compiler->arena, false);

if (result != ERROR_SUCCESS)
goto cleanup;

// Emit backwards code
if (result == ERROR_SUCCESS)
result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
result = yr_re_ast_emit_code(re_ast, compiler->arena, true);

if (result == ERROR_SUCCESS)
result = yr_atoms_extract_from_re(
&compiler->atoms_config,
re_ast,
modifier,
&atom_list,
min_atom_quality);
if (result != ERROR_SUCCESS)
goto cleanup;

// Extract atoms from the regular expression.
result = yr_atoms_extract_from_re(
&compiler->atoms_config,
re_ast,
modifier,
&atom_list,
min_atom_quality);

if (result != ERROR_SUCCESS)
goto cleanup;

// If no atom was extracted let's add a zero-length atom.
if (atom_list == NULL)
{
atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));

if (atom_list == NULL)
{
result = ERROR_INSUFFICIENT_MEMORY;
goto cleanup;
}

atom_list->atom.length = 0;
atom_list->backtrack = 0;
atom_list->backward_code_ref = YR_ARENA_NULL_REF;
atom_list->next = NULL;

yr_arena_ptr_to_ref(
compiler->arena,
yr_arena_get_ptr(
compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
&(atom_list->forward_code_ref));
}
}

string->flags = modifier.flags;
string->rule_idx = compiler->current_rule_idx;
string->idx = compiler->current_string_idx;
string->fixed_offset = YR_UNDEFINED;

if (result == ERROR_SUCCESS)
{
// Add the string to Aho-Corasick automaton.
result = yr_ac_add_string(
compiler->automaton,
string,
compiler->current_string_idx,
atom_list,
compiler->arena);
}
// Add the string to Aho-Corasick automaton.
result = yr_ac_add_string(
compiler->automaton,
string,
compiler->current_string_idx,
atom_list,
compiler->arena);

if (result != ERROR_SUCCESS)
goto cleanup;

if (modifier.flags & STRING_FLAGS_LITERAL)
{
Expand All @@ -580,6 +618,7 @@ static int _yr_parser_write_string(

compiler->current_string_idx++;

cleanup:
if (free_literal)
yr_free(literal_string);

Expand Down Expand Up @@ -761,11 +800,9 @@ int yr_parser_reduce_string_declaration(
{
if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
{
yywarning(
yyscanner,
"unknown escape sequence");
yywarning(yyscanner, "unknown escape sequence");
}
else
else
{
snprintf(
message,
Expand Down Expand Up @@ -1148,7 +1185,7 @@ int yr_parser_reduce_string_identifier(
YR_STRING* string;
YR_COMPILER* compiler = yyget_extra(yyscanner);

if (strcmp(identifier, "$") == 0) // is an anonymous string ?
if (strcmp(identifier, "$") == 0) // is an anonymous string ?
{
if (compiler->loop_for_of_var_index >= 0) // inside a loop ?
{
Expand Down
2 changes: 2 additions & 0 deletions tests/test-rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -2517,6 +2517,8 @@ void test_re()
assert_true_regexp("a{0,1}?bc", "abc", "abc");
assert_true_regexp("a{0,1}bc", "bbc", "bc");
assert_true_regexp("a{0,1}?bc", "abc", "bc");
assert_true_regexp("a{,0}", "a", "");
assert_true_regexp("a{,0}", "x", "");
assert_true_regexp("aa{0,1}?bc", "abc", "abc");
assert_true_regexp("aa{0,1}?bc", "abc", "abc");
assert_true_regexp("aa{0,1}bc", "abc", "abc");
Expand Down

0 comments on commit 8616165

Please sign in to comment.