Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

speed up CSS class queries #2137

Merged
merged 10 commits into from
Dec 18, 2020
3 changes: 1 addition & 2 deletions ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ public IRubyObject register_variable(IRubyObject name, IRubyObject value) {
}

private IRubyObject node_set(ThreadContext context, String expr, IRubyObject handler) {
final NokogiriXPathFunctionResolver fnResolver =
handler.isNil() ? null : NokogiriXPathFunctionResolver.create(handler);
final NokogiriXPathFunctionResolver fnResolver = NokogiriXPathFunctionResolver.create(handler);
try {
return tryGetNodeSet(context, expr, fnResolver);
}
Expand Down
11 changes: 9 additions & 2 deletions ext/java/nokogiri/internals/NokogiriNamespaceContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,15 @@
*/
public final class NokogiriNamespaceContext implements NamespaceContext {

public static final String NOKOGIRI_PREFIX = "nokogiri";
/*
* these constants have matching declarations in
* ext/nokogiri/xml_xpath_context.c
*/
public static final String NOKOGIRI_PREFIX = "nokogiri";
public static final String NOKOGIRI_URI = "http://www.nokogiri.org/default_ns/ruby/extensions_functions";
public static final String NOKOGIRI_TEMPORARY_ROOT_TAG = "nokogiri-temporary-root-tag";

public static final String NOKOGIRI_BUILTIN_PREFIX = "nokogiri-builtin";
public static final String NOKOGIRI_BUILTIN_URI = "https://www.nokogiri.org/default_ns/ruby/builtins";

private final Map<String,String> register;

Expand All @@ -63,6 +69,7 @@ public static NokogiriNamespaceContext create() {
private NokogiriNamespaceContext() {
register = new HashMap<String, String>(6, 1);
register.put(NOKOGIRI_PREFIX, NOKOGIRI_URI);
register.put(NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
register.put("xml", "http://www.w3.org/XML/1998/namespace");
register.put("xhtml", "http://www.w3.org/1999/xhtml");
}
Expand Down
68 changes: 62 additions & 6 deletions ext/java/nokogiri/internals/NokogiriXPathFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import javax.xml.xpath.XPathFunction;
import javax.xml.xpath.XPathFunctionException;
import javax.xml.namespace.QName;

import org.jruby.Ruby;
import org.jruby.RubyArray;
Expand Down Expand Up @@ -64,14 +65,14 @@
public class NokogiriXPathFunction implements XPathFunction {

private final IRubyObject handler;
private final String name;
private final QName name;
private final int arity;

public static NokogiriXPathFunction create(IRubyObject handler, String name, int arity) {
public static NokogiriXPathFunction create(IRubyObject handler, QName name, int arity) {
return new NokogiriXPathFunction(handler, name, arity);
}

private NokogiriXPathFunction(IRubyObject handler, String name, int arity) {
private NokogiriXPathFunction(IRubyObject handler, QName name, int arity) {
this.handler = handler;
this.name = name;
this.arity = arity;
Expand All @@ -82,11 +83,20 @@ public Object evaluate(List args) throws XPathFunctionException {
throw new XPathFunctionException("arity does not match");
}

final Ruby runtime = this.handler.getRuntime();
ThreadContext context = runtime.getCurrentContext();
if (name.getNamespaceURI().equals(NokogiriNamespaceContext.NOKOGIRI_BUILTIN_URI)) {
if (name.getLocalPart().equals("css-class")) {
return builtinCssClass(args);
}
}

IRubyObject result = Helpers.invoke(context, this.handler, this.name, fromObjectToRubyArgs(runtime, args));
if (this.handler.isNil()) {
throw new XPathFunctionException("no custom function handler declared for '" + name + "'");
}

final Ruby runtime = this.handler.getRuntime();
ThreadContext context = runtime.getCurrentContext();
IRubyObject result = Helpers.invoke(context, this.handler, this.name.getLocalPart(),
fromObjectToRubyArgs(runtime, args));
return fromRubyToObject(runtime, result);
}

Expand Down Expand Up @@ -121,4 +131,50 @@ private static Object fromRubyToObject(final Ruby runtime, IRubyObject obj) {
}
/*if (o instanceof XmlNode)*/ return ((XmlNode) obj).getNode();
}

private static boolean builtinCssClass(List args) throws XPathFunctionException {
flavorjones marked this conversation as resolved.
Show resolved Hide resolved
flavorjones marked this conversation as resolved.
Show resolved Hide resolved
if (args.size() != 2) {
throw new XPathFunctionException("builtin function nokogiri:css-class takes two arguments");
}

String hay = args.get(0).toString();
String needle = args.get(1).toString();

if (needle.length() == 0) {
return true;
}

int j = 0;
int j_lim = hay.length() - needle.length();
while (j <= j_lim) {
int k;
for (k = 0; k < needle.length(); k++) {
if (needle.charAt(k) != hay.charAt(j+k)) {
break;
}
}
if (k == needle.length()) {
if ((hay.length() == (j+k)) || isWhitespace(hay.charAt(j+k))) {
return true ;
}
}

/* advance str to whitespace */
while (j <= j_lim && !isWhitespace(hay.charAt(j))) {
j++;
}

/* advance str to start of next word or end of string */
while (j <= j_lim && isWhitespace(hay.charAt(j))) {
j++;
}
}

return false;
}

private static boolean isWhitespace(char subject) {
// see libxml2's xmlIsBlank_ch()
return ((subject == 0x09) || (subject == 0x0A) || (subject == 0x0D) || (subject == 0x20));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ public final class NokogiriXPathFunctionResolver implements XPathFunctionResolve

public static NokogiriXPathFunctionResolver create(IRubyObject handler) {
NokogiriXPathFunctionResolver freshResolver = new NokogiriXPathFunctionResolver();
freshResolver.setHandler(handler);
if (!handler.isNil()) {
freshResolver.setHandler(handler);
}
return freshResolver;
}

Expand All @@ -65,6 +67,6 @@ public void setHandler(IRubyObject handler) {
}

public XPathFunction resolveFunction(QName name, int arity) {
return NokogiriXPathFunction.create(handler, name.getLocalPart(), arity);
return NokogiriXPathFunction.create(handler, name, arity);
}
}
84 changes: 81 additions & 3 deletions ext/nokogiri/xml_xpath_context.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,86 @@
#include <xml_xpath_context.h>

/*
* these constants have matching declarations in
* ext/java/nokogiri/internals/NokogiriNamespaceContext.java
*/
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";

static void deallocate(xmlXPathContextPtr ctx)
{
NOKOGIRI_DEBUG_START(ctx);
xmlXPathFreeContext(ctx);
NOKOGIRI_DEBUG_END(ctx);
}

/* find a CSS class in an HTML element's `class` attribute */
const xmlChar* builtin_css_class(const xmlChar* str, const xmlChar *val)
{
int val_len;

if (str == NULL) { return(NULL); }
if (val == NULL) { return(NULL); }

val_len = xmlStrlen(val);
if (val_len == 0) { return(str); }

while (*str != 0) {
if ((*str == *val) && !xmlStrncmp(str, val, val_len)) {
const xmlChar* next_byte = str + val_len;

/* only match if the next byte is whitespace or end of string */
if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
return((const xmlChar*)str);
}
}

/* advance str to whitespace */
while ((*str != 0) && !IS_BLANK_CH(*str)) {
str++;
}

/* advance str to start of next word or end of string */
while ((*str != 0) && IS_BLANK_CH(*str)) {
str++;
}
}

return(NULL);
}

/* xmlXPathFunction to wrap builtin_css_class() */
static void xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
{
xmlXPathObjectPtr hay, needle;

CHECK_ARITY(2);

CAST_TO_STRING;
needle = valuePop(ctxt);
if ((needle == NULL) || (needle->type != XPATH_STRING)) {
xmlXPathFreeObject(needle);
XP_ERROR(XPATH_INVALID_TYPE);
}

CAST_TO_STRING;
hay = valuePop(ctxt);
if ((hay == NULL) || (hay->type != XPATH_STRING)) {
xmlXPathFreeObject(hay);
xmlXPathFreeObject(needle);
XP_ERROR(XPATH_INVALID_TYPE);
}

if (builtin_css_class(hay->stringval, needle->stringval)) {
valuePush(ctxt, xmlXPathNewBoolean(1));
} else {
valuePush(ctxt, xmlXPathNewBoolean(0));
}

xmlXPathFreeObject(hay);
xmlXPathFreeObject(needle);
}

/*
* call-seq:
* register_ns(prefix, uri)
Expand Down Expand Up @@ -261,14 +335,18 @@ static VALUE new(VALUE klass, VALUE nodeobj)
xmlXPathContextPtr ctx;
VALUE self;

xmlXPathInit();

Data_Get_Struct(nodeobj, xmlNode, node);

xmlXPathInit();

ctx = xmlXPathNewContext(node->doc);
ctx->node = node;

xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
xpath_builtin_css_class);

self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
/*rb_iv_set(self, "@xpath_handler", Qnil); */
return self;
}

Expand Down
6 changes: 3 additions & 3 deletions lib/nokogiri/css/parser.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true
#
# DO NOT MODIFY!!!!
# This file is automatically generated by Racc 1.4.16
# This file is automatically generated by Racc 1.5.1
# from Racc grammar file "".
#

Expand Down Expand Up @@ -476,7 +476,7 @@ def _reduce_26(val, _values, result)
end

def _reduce_27(val, _values, result)
# Non standard, but hpricot supports it.
# non-standard, from hpricot
result = Node.new(:PSEUDO_CLASS,
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
)
Expand Down Expand Up @@ -558,7 +558,7 @@ def _reduce_40(val, _values, result)
when 'n'
result = Node.new(:NTH, ['1','n','+','0'])
else
# This is not CSS standard. It allows us to support this:
# non-standard to support custom functions:
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/css/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ rule
)
}
| LSQUARE NUMBER RSQUARE {
# Non standard, but hpricot supports it.
# non-standard, from hpricot
result = Node.new(:PSEUDO_CLASS,
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
)
Expand Down Expand Up @@ -139,7 +139,7 @@ rule
when 'n'
result = Node.new(:NTH, ['1','n','+','0'])
else
# This is not CSS standard. It allows us to support this:
# non-standard to support custom functions:
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
Expand Down
Loading