Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some compiler edge cases. #1709

Merged
merged 3 commits into from
May 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/writingrules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,18 @@ The keywords ``any``, ``all`` and ``none`` can be used as well.
1 of ($*) // same that "any of them"
none of ($b*) // zero of the set of strings that start with "$b"

.. warning:: Due to the way YARA works internally, using "0 of them" is an
ambiguous part of the language which should be avoided in favor of "none
of them". To understand this, consider the meaning of "2 of them", which
is true if 2 or more of the strings match. Historically, "0 of them"
followed this principle and would evaluate to true if at least one of the
strings matched. This ambiguity is resolved in YARA 4.3.0 by making "0 of
them" evaluate to true if exactly 0 of the strings match. To improve on
the situation and make the intent clear, it is encouraged to use "none" in
place of 0. By not using an integer it is easier to reason about the meaning
of "none of them" without the historical understanding of "at least 0"
clouding the issue.


Starting with YARA 4.2.0 it is possible to express a set of strings in an
integer range, like this:
Expand Down
7 changes: 7 additions & 0 deletions libyara/compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,13 @@ YR_API char* yr_compiler_get_error_message(
"rule identifier \"%s\" matches previously used wildcard rule set",
compiler->last_error_extra_info);
break;
case ERROR_INVALID_VALUE:
snprintf(
buffer,
buffer_size,
"invalid value in condition: \"%s\"",
compiler->last_error_extra_info);
break;
}

return buffer;
Expand Down
1,258 changes: 682 additions & 576 deletions libyara/grammar.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion libyara/grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ extern int yara_yydebug;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 336 "grammar.y"
#line 341 "grammar.y"

YR_EXPRESSION expression;
SIZED_STRING* sized_string;
Expand Down
114 changes: 103 additions & 11 deletions libyara/grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -286,19 +286,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

%type <integer> integer_set
%type <integer> integer_enumeration
%type <integer> for_expression
%type <integer> rule_modifier
%type <integer> rule_modifiers
%type <integer> string_enumeration
%type <integer> string_enumeration_item
%type <integer> string_set
%type <integer> for_iteration
%type <integer> rule_enumeration
%type <integer> rule_enumeration_item
%type <integer> rule_set

%type <expression> primary_expression
%type <expression> boolean_expression
%type <expression> expression
%type <expression> identifier
%type <expression> regexp
%type <expression> for_expression
%type <expression> for_quantifier


%type <c_string> arguments
%type <c_string> arguments_list
Expand Down Expand Up @@ -1687,12 +1692,22 @@ expression
}
| for_expression _OF_ string_set
{
if ($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > $3)
{
yywarning(yyscanner,
"expression always false - requesting %lld of %lld.", $1.value.integer, $3);
}
yr_parser_emit_with_arg(yyscanner, OP_OF, OF_STRING_SET, NULL, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
}
| for_expression _OF_ rule_set
{
if ($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > $3)
{
yywarning(yyscanner,
"expression always false - requesting %lld of %lld.", $1.value.integer, $3);
}
yr_parser_emit_with_arg(yyscanner, OP_OF, OF_RULE_SET, NULL, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
Expand Down Expand Up @@ -1737,6 +1752,12 @@ expression
}
| for_expression _OF_ string_set _IN_ range
{
if ($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > $3)
{
yywarning(yyscanner,
"expression always false - requesting %lld of %lld.", $1.value.integer, $3);
}

yr_parser_emit(yyscanner, OP_OF_FOUND_IN, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
Expand Down Expand Up @@ -2106,6 +2127,18 @@ range
result = ERROR_WRONG_TYPE;
}

// If we can statically determine lower and upper bounds, ensure
// lower < upper. Check for upper bound here because some things (like
// string count) are EXPRESSION_TYPE_INTEGER.
if ($2.value.integer != YR_UNDEFINED &&
$4.value.integer != YR_UNDEFINED &&
$2.value.integer > $4.value.integer)
{
yr_compiler_set_error_extra_info(
compiler, "range lower bound must be greater than upper bound");
result = ERROR_INVALID_VALUE;
}

fail_if_error(result);
}
;
Expand Down Expand Up @@ -2214,12 +2247,15 @@ rule_set
yr_parser_emit_push_const(yyscanner, YR_UNDEFINED);
}
rule_enumeration ')'
{
$$ = $3;
}
;


rule_enumeration
: rule_enumeration_item
| rule_enumeration ',' rule_enumeration_item
: rule_enumeration_item { $$ = $1; }
| rule_enumeration ',' rule_enumeration_item { $$ = $1 + $3; }
;


Expand Down Expand Up @@ -2254,9 +2290,12 @@ rule_enumeration_item
yr_free($1);

fail_if_error(result);

$$ = 1;
}
| _IDENTIFIER_ '*'
{
int count = 0;
YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
compiler->arena,
YR_NAMESPACES_TABLE,
Expand All @@ -2268,33 +2307,86 @@ rule_enumeration_item
ns->name,
1);

int result = yr_parser_emit_pushes_for_rules(yyscanner, $1);
int result = yr_parser_emit_pushes_for_rules(yyscanner, $1, &count);
yr_free($1);

fail_if_error(result);

$$ = count;
}
;


for_expression
: primary_expression
{
$$ = FOR_EXPRESSION_ANY;
if ($1.type == EXPRESSION_TYPE_INTEGER)
{
if ($1.value.integer == 0)
{
yywarning(yyscanner,
"consider using \"none\" keyword, it is less ambiguous.");
}

if ($1.value.integer < 0)
{
yr_compiler_set_error_extra_info_fmt(compiler,
"%lld", $1.value.integer);
fail_with_error(ERROR_INVALID_VALUE);
}
}

if ($1.type == EXPRESSION_TYPE_STRING)
{
SIZED_STRING* ss = yr_arena_ref_to_ptr(compiler->arena,
&$1.value.sized_string_ref);
// If the expression is an external string variable we need to get
// it some other way.
if (ss != NULL)
{
yr_compiler_set_error_extra_info_fmt(compiler, "%s", ss->c_string);
}
else
{
yr_compiler_set_error_extra_info(compiler,
"string in for_expression is invalid");
}
fail_with_error(ERROR_INVALID_VALUE);
}

if ($1.type == EXPRESSION_TYPE_REGEXP)
{
yr_compiler_set_error_extra_info(compiler,
"regexp in for_expression is invalid");
fail_with_error(ERROR_INVALID_VALUE);
}

$$.value.integer = $1.value.integer;
}
| _ALL_
| for_quantifier
{
yr_parser_emit_push_const(yyscanner, YR_UNDEFINED);
$$ = FOR_EXPRESSION_ALL;
$$.value.integer = $1.value.integer;
}
;

for_quantifier
: _ALL_
{
yr_parser_emit_push_const(yyscanner, YR_UNDEFINED);
$$.type = EXPRESSION_TYPE_QUANTIFIER;
$$.value.integer = FOR_EXPRESSION_ALL;
}
| _ANY_
{
yr_parser_emit_push_const(yyscanner, 1);
$$ = FOR_EXPRESSION_ANY;
$$.type = EXPRESSION_TYPE_QUANTIFIER;
$$.value.integer = FOR_EXPRESSION_ANY;
}
| _NONE_
{
yr_parser_emit_push_const(yyscanner, 0);
$$ = FOR_EXPRESSION_NONE;
$$.type = EXPRESSION_TYPE_QUANTIFIER;
$$.value.integer = FOR_EXPRESSION_NONE;
}
;

Expand Down Expand Up @@ -2468,7 +2560,7 @@ primary_expression
{
case OBJECT_TYPE_INTEGER:
$$.type = EXPRESSION_TYPE_INTEGER;
$$.value.integer = YR_UNDEFINED;
$$.value.integer = $1.value.object->value.i;
break;
case OBJECT_TYPE_FLOAT:
$$.type = EXPRESSION_TYPE_FLOAT;
Expand Down
15 changes: 8 additions & 7 deletions libyara/include/yara/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define YARA_ERROR_LEVEL_WARNING 1

// Expression type constants are powers of two because they are used as flags.
#define EXPRESSION_TYPE_UNKNOWN 0
#define EXPRESSION_TYPE_BOOLEAN 1
#define EXPRESSION_TYPE_INTEGER 2
#define EXPRESSION_TYPE_STRING 4
#define EXPRESSION_TYPE_REGEXP 8
#define EXPRESSION_TYPE_OBJECT 16
#define EXPRESSION_TYPE_FLOAT 32
#define EXPRESSION_TYPE_UNKNOWN 0
#define EXPRESSION_TYPE_BOOLEAN 1
#define EXPRESSION_TYPE_INTEGER 2
#define EXPRESSION_TYPE_STRING 4
#define EXPRESSION_TYPE_REGEXP 8
#define EXPRESSION_TYPE_OBJECT 16
#define EXPRESSION_TYPE_FLOAT 32
#define EXPRESSION_TYPE_QUANTIFIER 64

// The compiler uses an arena to store the data it generates during the
// compilation. Each buffer in the arena is used for storing a different type
Expand Down
1 change: 1 addition & 0 deletions libyara/include/yara/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ERROR_BLOCK_NOT_READY 61
#define ERROR_INVALID_PERCENTAGE 62
#define ERROR_IDENTIFIER_MATCHES_WILDCARD 63
#define ERROR_INVALID_VALUE 64

#define GOTO_EXIT_ON_ERROR(x) \
{ \
Expand Down
3 changes: 2 additions & 1 deletion libyara/include/yara/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ int yr_parser_emit_pushes_for_strings(

int yr_parser_emit_pushes_for_rules(
yyscan_t yyscanner,
const char* identifier);
const char* identifier,
int *count);

int yr_parser_reduce_external(
yyscan_t yyscanner,
Expand Down
10 changes: 9 additions & 1 deletion libyara/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,10 @@ int yr_parser_emit_pushes_for_strings(

// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
// prefix.
int yr_parser_emit_pushes_for_rules(yyscan_t yyscanner, const char* prefix)
int yr_parser_emit_pushes_for_rules(
yyscan_t yyscanner,
const char* prefix,
int* count)
{
YR_COMPILER* compiler = yyget_extra(yyscanner);

Expand Down Expand Up @@ -284,6 +287,11 @@ int yr_parser_emit_pushes_for_rules(yyscan_t yyscanner, const char* prefix)
rule++;
}

if (count != NULL)
{
*count = matching;
}

if (matching == 0)
{
yr_compiler_set_error_extra_info(compiler, prefix);
Expand Down
Loading