Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12:Update
xerces-j2.2419
xerces-regularexpression-backport.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File xerces-regularexpression-backport.patch of Package xerces-j2.2419
diff --git a/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java b/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java index 4a0cbbb..d2f068c 100644 --- a/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java +++ b/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java @@ -1,9 +1,10 @@ /* - * Copyright 1999-2002,2004,2005 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -17,6 +18,10 @@ package org.apache.xerces.impl.xpath.regex; import java.text.CharacterIterator; +import java.util.Locale; +import java.util.Stack; + +import org.apache.xerces.util.IntStack; /** * A regular expression matching engine using Non-deterministic Finite Automaton (NFA). @@ -572,10 +577,7 @@ public class RegularExpression implements java.io.Serializable { if (tok.type == Token.NONGREEDYCLOSURE) { op = Op.createNonGreedyClosure(); } else { // Token.CLOSURE - if (child.getMinLength() == 0) - op = Op.createClosure(this.numberOfClosures++); - else - op = Op.createClosure(-1); + op = Op.createClosure(this.numberOfClosures++); } op.next = next; op.setChild(compile(child, op, reverse)); @@ -698,7 +700,7 @@ public class RegularExpression implements java.io.Serializable { * @param match A Match instance for storing matching result. * @return Offset of the start position in <VAR>target</VAR>; or -1 if not match. */ - public boolean matches(char[] target, int start, int end, Match match) { + public boolean matches(char[] target, int start, int end, Match match) { synchronized (this) { if (this.operations == null) @@ -723,14 +725,14 @@ public class RegularExpression implements java.io.Serializable { con.match = match; if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { - int matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); + int matchEnd = this. match(con, this.operations, con.start, 1, this.options); //System.err.println("DEBUG: matchEnd="+matchEnd); if (matchEnd == con.limit) { if (con.match != null) { con.match.setBeginning(0, con.start); con.match.setEnd(0, matchEnd); } - con.inuse = false; + con.setInUse(false); return true; } return false; @@ -748,10 +750,10 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, o); con.match.setEnd(0, o+this.fixedString.length()); } - con.inuse = false; + con.setInUse(false); return true; } - con.inuse = false; + con.setInUse(false); return false; } @@ -764,7 +766,7 @@ public class RegularExpression implements java.io.Serializable { int o = this.fixedStringTable.matches(target, con.start, con.limit); if (o < 0) { //System.err.println("Non-match in fixed-string search."); - con.inuse = false; + con.setInUse(false); return false; } } @@ -780,7 +782,7 @@ public class RegularExpression implements java.io.Serializable { && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { if (isSet(this.options, SINGLE_LINE)) { matchStart = con.start; - matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); + matchEnd = this. match(con, this.operations, con.start, 1, this.options); } else { boolean previousIsEOL = true; for (matchStart = con.start; matchStart <= limit; matchStart ++) { @@ -789,8 +791,8 @@ public class RegularExpression implements java.io.Serializable { previousIsEOL = true; } else { if (previousIsEOL) { - if (0 <= (matchEnd = this. matchCharArray (con, this.operations, - matchStart, 1, this.options))) + if (0 <= (matchEnd = this. match(con, this.operations, + matchStart, 1, this.options))) break; } previousIsEOL = false; @@ -805,33 +807,16 @@ public class RegularExpression implements java.io.Serializable { else if (this.firstChar != null) { //System.err.println("DEBUG: with firstchar-matching: "+this.firstChar); RangeToken range = this.firstChar; - if (RegularExpression.isSet(this.options, IGNORE_CASE)) { - range = this.firstChar.getCaseInsensitiveToken(); - for (matchStart = con.start; matchStart <= limit; matchStart ++) { - int ch = target [ matchStart ] ; - if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { - ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); - if (!range.match(ch)) continue; - } else { - if (!range.match(ch)) { - char ch1 = Character.toUpperCase((char)ch); - if (!range.match(ch1)) - if (!range.match(Character.toLowerCase(ch1))) - continue; - } - } - if (0 <= (matchEnd = this. matchCharArray (con, this.operations, - matchStart, 1, this.options))) - break; + for (matchStart = con.start; matchStart <= limit; matchStart ++) { + int ch = target [matchStart] ; + if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { + ch = REUtil.composeFromSurrogates(ch, target[matchStart+1]); } - } else { - for (matchStart = con.start; matchStart <= limit; matchStart ++) { - int ch = target [ matchStart ] ; - if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); - if (!range.match(ch)) continue; - if (0 <= (matchEnd = this. matchCharArray (con, this.operations, - matchStart, 1, this.options))) + if (!range.match(ch)) { + continue; + } + if (0 <= (matchEnd = this. match(con, this.operations, + matchStart, 1, this.options))) { break; } } @@ -842,7 +827,7 @@ public class RegularExpression implements java.io.Serializable { */ else { for (matchStart = con.start; matchStart <= limit; matchStart ++) { - if (0 <= (matchEnd = this. matchCharArray (con, this.operations, matchStart, 1, this.options))) + if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options))) break; } } @@ -852,521 +837,14 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, matchStart); con.match.setEnd(0, matchEnd); } - con.inuse = false; + con.setInUse(false); return true; } else { - con.inuse = false; - return false; - } - } - -/** - * @return -1 when not match; offset of the end of matched string when match. - */ - private int matchCharArray (Context con, Op op, int offset, int dx, int opts) { - - char[] target = con.charTarget; - - - while (true) { - if (op == null) - return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; - if (offset > con.limit || offset < con.start) - return -1; - switch (op.type) { - case Op.CHAR: - if (isSet(opts, IGNORE_CASE)) { - int ch = op.getData(); - if (dx > 0) { - if (offset >= con.limit || !matchIgnoreCase(ch, target [ offset ] )) - return -1; - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target [ o1 ] )) - return -1; - offset = o1; - } - } else { - int ch = op.getData(); - if (dx > 0) { - if (offset >= con.limit || ch != target [ offset ] ) - return -1; - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0 || ch != target [ o1 ] ) - return -1; - offset = o1; - } - } - op = op.next; - break; - - case Op.DOT: - if (dx > 0) { - if (offset >= con.limit) - return -1; - int ch = target [ offset ] ; - if (isSet(opts, SINGLE_LINE)) { - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - offset ++; - } else { - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] ); - if (isEOLChar(ch)) - return -1; - } - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0) - return -1; - int ch = target [ o1 ] ; - if (isSet(opts, SINGLE_LINE)) { - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - o1 --; - } else { - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch); - if (!isEOLChar(ch)) - return -1; - } - offset = o1; - } - op = op.next; - break; - - case Op.RANGE: - case Op.NRANGE: - if (dx > 0) { - if (offset >= con.limit) - return -1; - int ch = target [ offset ] ; - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] ); - RangeToken tok = op.getToken(); - if (isSet(opts, IGNORE_CASE)) { - tok = tok.getCaseInsensitiveToken(); - if (!tok.match(ch)) { - if (ch >= 0x10000) return -1; - char uch; - if (!tok.match(uch = Character.toUpperCase((char)ch)) - && !tok.match(Character.toLowerCase(uch))) - return -1; - } - } else { - if (!tok.match(ch)) return -1; - } - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0) - return -1; - int ch = target [ o1 ] ; - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch); - RangeToken tok = op.getToken(); - if (isSet(opts, IGNORE_CASE)) { - tok = tok.getCaseInsensitiveToken(); - if (!tok.match(ch)) { - if (ch >= 0x10000) return -1; - char uch; - if (!tok.match(uch = Character.toUpperCase((char)ch)) - && !tok.match(Character.toLowerCase(uch))) - return -1; - } - } else { - if (!tok.match(ch)) return -1; - } - offset = o1; - } - op = op.next; - break; - - case Op.ANCHOR: - boolean go = false; - switch (op.getData()) { - case '^': - if (isSet(opts, MULTIPLE_LINES)) { - if (!(offset == con.start - || offset > con.start && isEOLChar( target [ offset-1 ] ))) - return -1; - } else { - if (offset != con.start) - return -1; - } - break; - - case '@': // Internal use only. - // The @ always matches line beginnings. - if (!(offset == con.start - || offset > con.start && isEOLChar( target [ offset-1 ] ))) - return -1; - break; - - case '$': - if (isSet(opts, MULTIPLE_LINES)) { - if (!(offset == con.limit - || offset < con.limit && isEOLChar( target [ offset ] ))) - return -1; - } else { - if (!(offset == con.limit - || offset+1 == con.limit && isEOLChar( target [ offset ] ) - || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN - && target [ offset+1 ] == LINE_FEED)) - return -1; - } - break; - - case 'A': - if (offset != con.start) return -1; - break; - - case 'Z': - if (!(offset == con.limit - || offset+1 == con.limit && isEOLChar( target [ offset ] ) - || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN - && target [ offset+1 ] == LINE_FEED)) - return -1; - break; - - case 'z': - if (offset != con.limit) return -1; - break; - - case 'b': - if (con.length == 0) return -1; - { - int after = getWordType(target, con.start, con.limit, offset, opts); - if (after == WT_IGNORE) return -1; - int before = getPreviousWordType(target, con.start, con.limit, offset, opts); - if (after == before) return -1; - } - break; - - case 'B': - if (con.length == 0) - go = true; - else { - int after = getWordType(target, con.start, con.limit, offset, opts); - go = after == WT_IGNORE - || after == getPreviousWordType(target, con.start, con.limit, offset, opts); - } - if (!go) return -1; - break; - - case '<': - if (con.length == 0 || offset == con.limit) return -1; - if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER - || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) - return -1; - break; - - case '>': - if (con.length == 0 || offset == con.start) return -1; - if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER - || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) - return -1; - break; - } // switch anchor type - op = op.next; - break; - - case Op.BACKREFERENCE: - { - int refno = op.getData(); - if (refno <= 0 || refno >= this.nofparen) - throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno); - if (con.match.getBeginning(refno) < 0 - || con.match.getEnd(refno) < 0) - return -1; // ******** - int o2 = con.match.getBeginning(refno); - int literallen = con.match.getEnd(refno)-o2; - if (!isSet(opts, IGNORE_CASE)) { - if (dx > 0) { - if (!regionMatches(target, offset, con.limit, o2, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) - return -1; - offset -= literallen; - } - } else { - if (dx > 0) { - if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, - o2, literallen)) - return -1; - offset -= literallen; - } - } - } - op = op.next; - break; - case Op.STRING: - { - String literal = op.getString(); - int literallen = literal.length(); - if (!isSet(opts, IGNORE_CASE)) { - if (dx > 0) { - if (!regionMatches(target, offset, con.limit, literal, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) - return -1; - offset -= literallen; - } - } else { - if (dx > 0) { - if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, - literal, literallen)) - return -1; - offset -= literallen; - } - } - } - op = op.next; - break; - - case Op.CLOSURE: - { - /* - * Saves current position to avoid - * zero-width repeats. - */ - int id = op.getData(); - if (id >= 0) { - int previousOffset = con.offsets[id]; - if (previousOffset < 0 || previousOffset != offset) { - con.offsets[id] = offset; - } else { - con.offsets[id] = -1; - op = op.next; - break; - } - } - - int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); - if (id >= 0) con.offsets[id] = -1; - if (ret >= 0) return ret; - op = op.next; - } - break; - - case Op.QUESTION: - { - int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); - if (ret >= 0) return ret; - op = op.next; - } - break; - - case Op.NONGREEDYCLOSURE: - case Op.NONGREEDYQUESTION: - { - int ret = this. matchCharArray (con, op.next, offset, dx, opts); - if (ret >= 0) return ret; - op = op.getChild(); - } - break; - - case Op.UNION: - for (int i = 0; i < op.size(); i ++) { - int ret = this. matchCharArray (con, op.elementAt(i), offset, dx, opts); - if (DEBUG) { - System.err.println("UNION: "+i+", ret="+ret); - } - if (ret >= 0) return ret; - } - return -1; - - case Op.CAPTURE: - int refno = op.getData(); - if (con.match != null && refno > 0) { - int save = con.match.getBeginning(refno); - con.match.setBeginning(refno, offset); - int ret = this. matchCharArray (con, op.next, offset, dx, opts); - if (ret < 0) con.match.setBeginning(refno, save); - return ret; - } else if (con.match != null && refno < 0) { - int index = -refno; - int save = con.match.getEnd(index); - con.match.setEnd(index, offset); - int ret = this. matchCharArray (con, op.next, offset, dx, opts); - if (ret < 0) con.match.setEnd(index, save); - return ret; - } - op = op.next; - break; - - case Op.LOOKAHEAD: - if (0 > this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1; - op = op.next; - break; - case Op.NEGATIVELOOKAHEAD: - if (0 <= this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1; - op = op.next; - break; - case Op.LOOKBEHIND: - if (0 > this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1; - op = op.next; - break; - case Op.NEGATIVELOOKBEHIND: - if (0 <= this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1; - op = op.next; - break; - - case Op.INDEPENDENT: - { - int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); - if (ret < 0) return ret; - offset = ret; - op = op.next; - } - break; - - case Op.MODIFIER: - { - int localopts = opts; - localopts |= op.getData(); - localopts &= ~op.getData2(); - //System.err.println("MODIFIER: "+Integer.toString(opts, 16)+" -> "+Integer.toString(localopts, 16)); - int ret = this. matchCharArray (con, op.getChild(), offset, dx, localopts); - if (ret < 0) return ret; - offset = ret; - op = op.next; - } - break; - - case Op.CONDITION: - { - Op.ConditionOp cop = (Op.ConditionOp)op; - boolean matchp = false; - if (cop.refNumber > 0) { - if (cop.refNumber >= this.nofparen) - throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber); - matchp = con.match.getBeginning(cop.refNumber) >= 0 - && con.match.getEnd(cop.refNumber) >= 0; - } else { - matchp = 0 <= this. matchCharArray (con, cop.condition, offset, dx, opts); - } - - if (matchp) { - op = cop.yes; - } else if (cop.no != null) { - op = cop.no; - } else { - op = cop.next; - } - } - break; - - default: - throw new RuntimeException("Unknown operation type: "+op.type); - } // switch (op.type) - } // while - } - - private static final int getPreviousWordType(char[] target, int begin, int end, - int offset, int opts) { - int ret = getWordType(target, begin, end, --offset, opts); - while (ret == WT_IGNORE) - ret = getWordType(target, begin, end, --offset, opts); - return ret; - } - - private static final int getWordType(char[] target, int begin, int end, - int offset, int opts) { - if (offset < begin || offset >= end) return WT_OTHER; - return getWordType0( target [ offset ] , opts); - } - - - - private static final boolean regionMatches(char[] target, int offset, int limit, - String part, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = 0; - while (partlen-- > 0) { - if ( target [ offset++ ] != part.charAt(i++)) - return false; - } - return true; - } - - private static final boolean regionMatches(char[] target, int offset, int limit, - int offset2, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = offset2; - while (partlen-- > 0) { - if ( target [ offset++ ] != target [ i++ ] ) - return false; - } - return true; - } - -/** - * @see java.lang.String#regionMatches - */ - private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit, - String part, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = 0; - while (partlen-- > 0) { - char ch1 = target [ offset++ ] ; - char ch2 = part.charAt(i++); - if (ch1 == ch2) - continue; - char uch1 = Character.toUpperCase(ch1); - char uch2 = Character.toUpperCase(ch2); - if (uch1 == uch2) - continue; - if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) - return false; - } - return true; - } - - private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit, - int offset2, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) + con.setInUse(false); return false; - int i = offset2; - while (partlen-- > 0) { - char ch1 = target [ offset++ ] ; - char ch2 = target [ i++ ] ; - if (ch1 == ch2) - continue; - char uch1 = Character.toUpperCase(ch1); - char uch2 = Character.toUpperCase(ch2); - if (uch1 == uch2) - continue; - if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) - return false; } - return true; } - - - /** * Checks whether the <var>target</var> text <strong>contains</strong> this pattern or not. * @@ -1435,7 +913,7 @@ public class RegularExpression implements java.io.Serializable { if (DEBUG) { System.err.println("target string="+target); } - int matchEnd = this. matchString (con, this.operations, con.start, 1, this.options); + int matchEnd = this. match(con, this.operations, con.start, 1, this.options); if (DEBUG) { System.err.println("matchEnd="+matchEnd); System.err.println("con.limit="+con.limit); @@ -1445,7 +923,7 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, con.start); con.match.setEnd(0, matchEnd); } - con.inuse = false; + con.setInUse(false); return true; } return false; @@ -1463,10 +941,10 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, o); con.match.setEnd(0, o+this.fixedString.length()); } - con.inuse = false; + con.setInUse(false); return true; } - con.inuse = false; + con.setInUse(false); return false; } @@ -1479,7 +957,7 @@ public class RegularExpression implements java.io.Serializable { int o = this.fixedStringTable.matches(target, con.start, con.limit); if (o < 0) { //System.err.println("Non-match in fixed-string search."); - con.inuse = false; + con.setInUse(false); return false; } } @@ -1495,7 +973,7 @@ public class RegularExpression implements java.io.Serializable { && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { if (isSet(this.options, SINGLE_LINE)) { matchStart = con.start; - matchEnd = this. matchString (con, this.operations, con.start, 1, this.options); + matchEnd = this.match(con, this.operations, con.start, 1, this.options); } else { boolean previousIsEOL = true; for (matchStart = con.start; matchStart <= limit; matchStart ++) { @@ -1504,8 +982,8 @@ public class RegularExpression implements java.io.Serializable { previousIsEOL = true; } else { if (previousIsEOL) { - if (0 <= (matchEnd = this. matchString (con, this.operations, - matchStart, 1, this.options))) + if (0 <= (matchEnd = this.match(con, this.operations, + matchStart, 1, this.options))) break; } previousIsEOL = false; @@ -1520,35 +998,18 @@ public class RegularExpression implements java.io.Serializable { else if (this.firstChar != null) { //System.err.println("DEBUG: with firstchar-matching: "+this.firstChar); RangeToken range = this.firstChar; - if (RegularExpression.isSet(this.options, IGNORE_CASE)) { - range = this.firstChar.getCaseInsensitiveToken(); - for (matchStart = con.start; matchStart <= limit; matchStart ++) { - int ch = target .charAt( matchStart ) ; - if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { - ch = REUtil.composeFromSurrogates(ch, target .charAt( matchStart+1 ) ); - if (!range.match(ch)) continue; - } else { - if (!range.match(ch)) { - char ch1 = Character.toUpperCase((char)ch); - if (!range.match(ch1)) - if (!range.match(Character.toLowerCase(ch1))) - continue; - } - } - if (0 <= (matchEnd = this. matchString (con, this.operations, - matchStart, 1, this.options))) - break; + for (matchStart = con.start; matchStart <= limit; matchStart ++) { + int ch = target .charAt( matchStart ) ; + if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { + ch = REUtil.composeFromSurrogates(ch, target.charAt(matchStart+1)); } - } else { - for (matchStart = con.start; matchStart <= limit; matchStart ++) { - int ch = target .charAt( matchStart ) ; - if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target .charAt( matchStart+1 ) ); - if (!range.match(ch)) continue; - if (0 <= (matchEnd = this. matchString (con, this.operations, - matchStart, 1, this.options))) - break; + if (!range.match(ch)) { + continue; } + if (0 <= (matchEnd = this.match(con, this.operations, + matchStart, 1, this.options))) { + break; + } } } @@ -1557,7 +1018,7 @@ public class RegularExpression implements java.io.Serializable { */ else { for (matchStart = con.start; matchStart <= limit; matchStart ++) { - if (0 <= (matchEnd = this. matchString (con, this.operations, matchStart, 1, this.options))) + if (0 <= (matchEnd = this.match(con, this.operations, matchStart, 1, this.options))) break; } } @@ -1567,10 +1028,10 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, matchStart); con.match.setEnd(0, matchEnd); } - con.inuse = false; + con.setInUse(false); return true; } else { - con.inuse = false; + con.setInUse(false); return false; } } @@ -1578,466 +1039,516 @@ public class RegularExpression implements java.io.Serializable { /** * @return -1 when not match; offset of the end of matched string when match. */ - private int matchString (Context con, Op op, int offset, int dx, int opts) { - - - - - String target = con.strTarget; - - - - - while (true) { - if (op == null) - return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; - if (offset > con.limit || offset < con.start) - return -1; - switch (op.type) { - case Op.CHAR: - if (isSet(opts, IGNORE_CASE)) { - int ch = op.getData(); - if (dx > 0) { - if (offset >= con.limit || !matchIgnoreCase(ch, target .charAt( offset ) )) - return -1; - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .charAt( o1 ) )) - return -1; - offset = o1; - } - } else { - int ch = op.getData(); - if (dx > 0) { - if (offset >= con.limit || ch != target .charAt( offset ) ) - return -1; - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0 || ch != target .charAt( o1 ) ) - return -1; - offset = o1; - } + private int match(Context con, Op op, int offset, int dx, int opts) { + final ExpressionTarget target = con.target; + final Stack opStack = new Stack(); + final IntStack dataStack = new IntStack(); + final boolean isSetIgnoreCase = isSet(opts, IGNORE_CASE); + int retValue = -1; + boolean returned = false; + + for (;;) { + if (op == null || offset > con.limit || offset < con.start) { + if (op == null) { + retValue = isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; } - op = op.next; - break; - - case Op.DOT: - if (dx > 0) { - if (offset >= con.limit) - return -1; - int ch = target .charAt( offset ) ; - if (isSet(opts, SINGLE_LINE)) { - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - offset ++; - } else { - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) ); - if (isEOLChar(ch)) - return -1; - } - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0) - return -1; - int ch = target .charAt( o1 ) ; - if (isSet(opts, SINGLE_LINE)) { - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - o1 --; - } else { - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch); - if (!isEOLChar(ch)) - return -1; - } - offset = o1; + else { + retValue = -1; } - op = op.next; - break; - - case Op.RANGE: - case Op.NRANGE: - if (dx > 0) { - if (offset >= con.limit) - return -1; - int ch = target .charAt( offset ) ; - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) ); - RangeToken tok = op.getToken(); - if (isSet(opts, IGNORE_CASE)) { - tok = tok.getCaseInsensitiveToken(); - if (!tok.match(ch)) { - if (ch >= 0x10000) return -1; - char uch; - if (!tok.match(uch = Character.toUpperCase((char)ch)) - && !tok.match(Character.toLowerCase(uch))) - return -1; - } - } else { - if (!tok.match(ch)) return -1; - } - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0) - return -1; - int ch = target .charAt( o1 ) ; - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch); - RangeToken tok = op.getToken(); - if (isSet(opts, IGNORE_CASE)) { - tok = tok.getCaseInsensitiveToken(); - if (!tok.match(ch)) { - if (ch >= 0x10000) return -1; - char uch; - if (!tok.match(uch = Character.toUpperCase((char)ch)) - && !tok.match(Character.toLowerCase(uch))) - return -1; + returned = true; + } + else { + retValue = -1; + // dx value is either 1 or -1 + switch (op.type) { + case Op.CHAR: + { + final int o1 = (dx > 0) ? offset : offset -1; + if (o1 >= con.limit || o1 < 0 || !matchChar(op.getData(), target.charAt(o1), isSetIgnoreCase)) { + returned = true; + break; } - } else { - if (!tok.match(ch)) return -1; - } - offset = o1; - } - op = op.next; - break; - - case Op.ANCHOR: - boolean go = false; - switch (op.getData()) { - case '^': - if (isSet(opts, MULTIPLE_LINES)) { - if (!(offset == con.start - || offset > con.start && isEOLChar( target .charAt( offset-1 ) ))) - return -1; - } else { - if (offset != con.start) - return -1; + offset += dx; + op = op.next; } break; - case '@': // Internal use only. - // The @ always matches line beginnings. - if (!(offset == con.start - || offset > con.start && isEOLChar( target .charAt( offset-1 ) ))) - return -1; - break; - - case '$': - if (isSet(opts, MULTIPLE_LINES)) { - if (!(offset == con.limit - || offset < con.limit && isEOLChar( target .charAt( offset ) ))) - return -1; - } else { - if (!(offset == con.limit - || offset+1 == con.limit && isEOLChar( target .charAt( offset ) ) - || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN - && target .charAt( offset+1 ) == LINE_FEED)) - return -1; + case Op.DOT: + { + int o1 = (dx > 0) ? offset : offset - 1; + if (o1 >= con.limit || o1 < 0) { + returned = true; + break; + } + if (isSet(opts, SINGLE_LINE)) { + if (REUtil.isHighSurrogate(target.charAt(o1)) && o1+dx >= 0 && o1+dx < con.limit) { + o1 += dx; + } + } + else { + int ch = target.charAt(o1); + if (REUtil.isHighSurrogate(ch) && o1+dx >= 0 && o1+dx < con.limit) { + o1 += dx; + ch = REUtil.composeFromSurrogates(ch, target.charAt(o1)); + } + if (isEOLChar(ch)) { + returned = true; + break; + } + } + offset = (dx > 0) ? o1 + 1 : o1; + op = op.next; } break; - case 'A': - if (offset != con.start) return -1; - break; - - case 'Z': - if (!(offset == con.limit - || offset+1 == con.limit && isEOLChar( target .charAt( offset ) ) - || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN - && target .charAt( offset+1 ) == LINE_FEED)) - return -1; - break; - - case 'z': - if (offset != con.limit) return -1; - break; - - case 'b': - if (con.length == 0) return -1; + case Op.RANGE: + case Op.NRANGE: { - int after = getWordType(target, con.start, con.limit, offset, opts); - if (after == WT_IGNORE) return -1; - int before = getPreviousWordType(target, con.start, con.limit, offset, opts); - if (after == before) return -1; + int o1 = (dx > 0) ? offset : offset -1; + if (o1 >= con.limit || o1 < 0) { + returned = true; + break; + } + int ch = target.charAt(offset); + if (REUtil.isHighSurrogate(ch) && o1+dx < con.limit && o1+dx >=0) { + o1 += dx; + ch = REUtil.composeFromSurrogates(ch, target.charAt(o1)); + } + final RangeToken tok = op.getToken(); + if (!tok.match(ch)) { + returned = true; + break; + } + offset = (dx > 0) ? o1+1 : o1; + op = op.next; } break; - case 'B': - if (con.length == 0) - go = true; - else { - int after = getWordType(target, con.start, con.limit, offset, opts); - go = after == WT_IGNORE - || after == getPreviousWordType(target, con.start, con.limit, offset, opts); + case Op.ANCHOR: + { + if (!matchAnchor(target, op, con, offset, opts)) { + returned = true; + break; + } + op = op.next; } - if (!go) return -1; break; - case '<': - if (con.length == 0 || offset == con.limit) return -1; - if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER - || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) - return -1; - break; - - case '>': - if (con.length == 0 || offset == con.start) return -1; - if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER - || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) - return -1; - break; - } // switch anchor type - op = op.next; - break; - - case Op.BACKREFERENCE: - { - int refno = op.getData(); - if (refno <= 0 || refno >= this.nofparen) - throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno); - if (con.match.getBeginning(refno) < 0 - || con.match.getEnd(refno) < 0) - return -1; // ******** - int o2 = con.match.getBeginning(refno); - int literallen = con.match.getEnd(refno)-o2; - if (!isSet(opts, IGNORE_CASE)) { - if (dx > 0) { - if (!regionMatches(target, offset, con.limit, o2, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) - return -1; - offset -= literallen; + case Op.BACKREFERENCE: + { + int refno = op.getData(); + if (refno <= 0 || refno >= this.nofparen) { + throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno); } - } else { + if (con.match.getBeginning(refno) < 0 || con.match.getEnd(refno) < 0) { + returned = true; + break; + } + int o2 = con.match.getBeginning(refno); + int literallen = con.match.getEnd(refno)-o2; if (dx > 0) { - if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) - return -1; + if (!target.regionMatches(isSetIgnoreCase, offset, con.limit, o2, literallen)) { + returned = true; + break; + } offset += literallen; - } else { - if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, - o2, literallen)) - return -1; + } + else { + if (!target.regionMatches(isSetIgnoreCase, offset-literallen, con.limit, o2, literallen)) { + returned = true; + break; + } offset -= literallen; } + op = op.next; } - } - op = op.next; - break; - case Op.STRING: - { - String literal = op.getString(); - int literallen = literal.length(); - if (!isSet(opts, IGNORE_CASE)) { + break; + + case Op.STRING: + { + String literal = op.getString(); + int literallen = literal.length(); if (dx > 0) { - if (!regionMatches(target, offset, con.limit, literal, literallen)) - return -1; + if (!target.regionMatches(isSetIgnoreCase, offset, con.limit, literal, literallen)) { + returned = true; + break; + } offset += literallen; - } else { - if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) - return -1; - offset -= literallen; } - } else { - if (dx > 0) { - if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, - literal, literallen)) - return -1; + else { + if (!target.regionMatches(isSetIgnoreCase, offset-literallen, con.limit, literal, literallen)) { + returned = true; + break; + } offset -= literallen; } + op = op.next; } - } - op = op.next; - break; + break; - case Op.CLOSURE: - { - /* - * Saves current position to avoid - * zero-width repeats. - */ - int id = op.getData(); - if (id >= 0) { + case Op.CLOSURE: + { + // Saves current position to avoid zero-width repeats. + final int id = op.getData(); int previousOffset = con.offsets[id]; - if (previousOffset < 0 || previousOffset != offset) { - con.offsets[id] = offset; - } else { - con.offsets[id] = -1; + if (previousOffset == offset) { + returned = true; + break; + } + con.offsets[id] = offset; + if (offset < previousOffset) { op = op.next; break; } } - int ret = this. matchString (con, op.getChild(), offset, dx, opts); - if (id >= 0) con.offsets[id] = -1; - if (ret >= 0) return ret; - op = op.next; - } - break; + // fall through - case Op.QUESTION: - { - int ret = this. matchString (con, op.getChild(), offset, dx, opts); - if (ret >= 0) return ret; - op = op.next; - } - break; + case Op.QUESTION: + { + opStack.push(op); + dataStack.push(offset); + op = op.getChild(); + } + break; - case Op.NONGREEDYCLOSURE: - case Op.NONGREEDYQUESTION: - { - int ret = this. matchString (con, op.next, offset, dx, opts); - if (ret >= 0) return ret; - op = op.getChild(); - } - break; + case Op.NONGREEDYCLOSURE: + case Op.NONGREEDYQUESTION: + { + opStack.push(op); + dataStack.push(offset); + op = op.next; + } + break; - case Op.UNION: - for (int i = 0; i < op.size(); i ++) { - int ret = this. matchString (con, op.elementAt(i), offset, dx, opts); - if (DEBUG) { - System.err.println("UNION: "+i+", ret="+ret); + case Op.UNION: + if (op.size() == 0) { + returned = true; } - if (ret >= 0) return ret; - } - return -1; - - case Op.CAPTURE: - int refno = op.getData(); - if (con.match != null && refno > 0) { - int save = con.match.getBeginning(refno); - con.match.setBeginning(refno, offset); - int ret = this. matchString (con, op.next, offset, dx, opts); - if (ret < 0) con.match.setBeginning(refno, save); - return ret; - } else if (con.match != null && refno < 0) { - int index = -refno; - int save = con.match.getEnd(index); - con.match.setEnd(index, offset); - int ret = this. matchString (con, op.next, offset, dx, opts); - if (ret < 0) con.match.setEnd(index, save); - return ret; - } - op = op.next; - break; + else { + opStack.push(op); + dataStack.push(0); + dataStack.push(offset); + op = op.elementAt(0); + } + break; - case Op.LOOKAHEAD: - if (0 > this. matchString (con, op.getChild(), offset, 1, opts)) return -1; - op = op.next; - break; - case Op.NEGATIVELOOKAHEAD: - if (0 <= this. matchString (con, op.getChild(), offset, 1, opts)) return -1; - op = op.next; - break; - case Op.LOOKBEHIND: - if (0 > this. matchString (con, op.getChild(), offset, -1, opts)) return -1; - op = op.next; - break; - case Op.NEGATIVELOOKBEHIND: - if (0 <= this. matchString (con, op.getChild(), offset, -1, opts)) return -1; - op = op.next; - break; + case Op.CAPTURE: + { + final int refno = op.getData(); + if (con.match != null) { + if (refno > 0) { + dataStack.push(con.match.getBeginning(refno)); + con.match.setBeginning(refno, offset); + } + else { + final int index = -refno; + dataStack.push(con.match.getEnd(index)); + con.match.setEnd(index, offset); + } + opStack.push(op); + dataStack.push(offset); + } + op = op.next; + } + break; - case Op.INDEPENDENT: - { - int ret = this. matchString (con, op.getChild(), offset, dx, opts); - if (ret < 0) return ret; - offset = ret; - op = op.next; + case Op.LOOKAHEAD: + case Op.NEGATIVELOOKAHEAD: + case Op.LOOKBEHIND: + case Op.NEGATIVELOOKBEHIND: + { + opStack.push(op); + dataStack.push(dx); + dataStack.push(offset); + dx = (op.type == Op.LOOKAHEAD || op.type == Op.NEGATIVELOOKAHEAD) ? 1 : -1; + op = op.getChild(); + } + break; + + case Op.INDEPENDENT: + { + opStack.push(op); + dataStack.push(offset); + op = op.getChild(); + } + break; + + case Op.MODIFIER: + { + int localopts = opts; + localopts |= op.getData(); + localopts &= ~op.getData2(); + opStack.push(op); + dataStack.push(opts); + dataStack.push(offset); + opts = localopts; + op = op.getChild(); + } + break; + + case Op.CONDITION: + { + Op.ConditionOp cop = (Op.ConditionOp)op; + if (cop.refNumber > 0) { + if (cop.refNumber >= this.nofparen) { + throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber); + } + if (con.match.getBeginning(cop.refNumber) >= 0 + && con.match.getEnd(cop.refNumber) >= 0) { + op = cop.yes; + } + else if (cop.no != null) { + op = cop.no; + } + else { + op = cop.next; + } + } + else { + opStack.push(op); + dataStack.push(offset); + op = cop.condition; + } + } + break; + + default: + throw new RuntimeException("Unknown operation type: " + op.type); } - break; + } - case Op.MODIFIER: - { - int localopts = opts; - localopts |= op.getData(); - localopts &= ~op.getData2(); - //System.err.println("MODIFIER: "+Integer.toString(opts, 16)+" -> "+Integer.toString(localopts, 16)); - int ret = this. matchString (con, op.getChild(), offset, dx, localopts); - if (ret < 0) return ret; - offset = ret; - op = op.next; + // handle recursive operations + while (returned) { + // exhausted all the operations + if (opStack.isEmpty()) { + return retValue; } - break; - case Op.CONDITION: - { - Op.ConditionOp cop = (Op.ConditionOp)op; - boolean matchp = false; - if (cop.refNumber > 0) { - if (cop.refNumber >= this.nofparen) - throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber); - matchp = con.match.getBeginning(cop.refNumber) >= 0 - && con.match.getEnd(cop.refNumber) >= 0; - } else { - matchp = 0 <= this. matchString (con, cop.condition, offset, dx, opts); + op = (Op) opStack.pop(); + offset = dataStack.pop(); + + switch (op.type) { + case Op.CLOSURE: + con.offsets[op.getData()] = offset; + // fall through - same behavior as Op.Question + + case Op.QUESTION: + if (retValue < 0) { + op = op.next; + returned = false; } + break; - if (matchp) { - op = cop.yes; - } else if (cop.no != null) { - op = cop.no; - } else { - op = cop.next; + case Op.NONGREEDYCLOSURE: + case Op.NONGREEDYQUESTION: + if (retValue < 0) { + op = op.getChild(); + returned = false; } - } - break; + break; - default: - throw new RuntimeException("Unknown operation type: "+op.type); - } // switch (op.type) - } // while - } + case Op.UNION: + { + int unionIndex = dataStack.pop(); + if (DEBUG) { + System.err.println("UNION: "+unionIndex+", ret="+retValue); + } - private static final int getPreviousWordType(String target, int begin, int end, - int offset, int opts) { - int ret = getWordType(target, begin, end, --offset, opts); - while (ret == WT_IGNORE) - ret = getWordType(target, begin, end, --offset, opts); - return ret; - } + if (retValue < 0) { + if (++unionIndex < op.size()) { + opStack.push(op); + dataStack.push(unionIndex); + dataStack.push(offset); + op = op.elementAt(unionIndex); + returned = false; + } + else { + retValue = -1; + } + } + } + break; - private static final int getWordType(String target, int begin, int end, - int offset, int opts) { - if (offset < begin || offset >= end) return WT_OTHER; - return getWordType0( target .charAt( offset ) , opts); - } + case Op.CAPTURE: + final int refno = op.getData(); + final int saved = dataStack.pop(); + if (retValue < 0) { + if (refno > 0) { + con.match.setBeginning(refno, saved); + } + else { + con.match.setEnd(-refno, saved); + } + } + break; + + case Op.LOOKAHEAD: + case Op.LOOKBEHIND: + { + dx = dataStack.pop(); + if (0 <= retValue) { + op = op.next; + returned = false; + } + retValue = -1; + } + break; + case Op.NEGATIVELOOKAHEAD: + case Op.NEGATIVELOOKBEHIND: + { + dx = dataStack.pop(); + if (0 > retValue) { + op = op.next; + returned = false; + } + retValue = -1; + } + break; - private static final boolean regionMatches(String text, int offset, int limit, - String part, int partlen) { - if (limit-offset < partlen) return false; - return text.regionMatches(offset, part, 0, partlen); - } + case Op.MODIFIER: + opts = dataStack.pop(); + // fall through - private static final boolean regionMatches(String text, int offset, int limit, - int offset2, int partlen) { - if (limit-offset < partlen) return false; - return text.regionMatches(offset, text, offset2, partlen); - } + case Op.INDEPENDENT: + if (retValue >= 0) { + offset = retValue; + op = op.next; + returned = false; + } + break; + + case Op.CONDITION: + { + final Op.ConditionOp cop = (Op.ConditionOp)op; + if (0 <= retValue) { + op = cop.yes; + } + else if (cop.no != null) { + op = cop.no; + } + else { + op = cop.next; + } + } + returned = false; + break; - private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit, - String part, int partlen) { - return text.regionMatches(true, offset, part, 0, partlen); + default: + break; + } + } + } } - private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit, - int offset2, int partlen) { - if (limit-offset < partlen) return false; - return text.regionMatches(true, offset, text, offset2, partlen); + private boolean matchChar(int ch, int other, boolean ignoreCase) { + return (ignoreCase) ? matchIgnoreCase(ch, other) : ch == other; } + boolean matchAnchor(ExpressionTarget target, Op op, Context con, int offset, int opts) { + boolean go = false; + switch (op.getData()) { + case '^': + if (isSet(opts, MULTIPLE_LINES)) { + if (!(offset == con.start + || offset > con.start && offset < con.limit && isEOLChar(target.charAt(offset-1)))) + return false; + } else { + if (offset != con.start) + return false; + } + break; + + case '@': // Internal use only. + // The @ always matches line beginnings. + if (!(offset == con.start + || offset > con.start && isEOLChar(target.charAt(offset-1)))) + return false; + break; + + case '$': + if (isSet(opts, MULTIPLE_LINES)) { + if (!(offset == con.limit + || offset < con.limit && isEOLChar(target.charAt(offset)))) + return false; + } else { + if (!(offset == con.limit + || offset+1 == con.limit && isEOLChar(target.charAt(offset)) + || offset+2 == con.limit && target.charAt(offset) == CARRIAGE_RETURN + && target.charAt(offset+1) == LINE_FEED)) + return false; + } + break; + + case 'A': + if (offset != con.start) return false; + break; + + case 'Z': + if (!(offset == con.limit + || offset+1 == con.limit && isEOLChar(target.charAt(offset)) + || offset+2 == con.limit && target.charAt(offset) == CARRIAGE_RETURN + && target.charAt(offset+1) == LINE_FEED)) + return false; + break; + + case 'z': + if (offset != con.limit) return false; + break; + + case 'b': + if (con.length == 0) + return false; + { + int after = getWordType(target, con.start, con.limit, offset, opts); + if (after == WT_IGNORE) return false; + int before = getPreviousWordType(target, con.start, con.limit, offset, opts); + if (after == before) return false; + } + break; + case 'B': + if (con.length == 0) + go = true; + else { + int after = getWordType(target, con.start, con.limit, offset, opts); + go = after == WT_IGNORE + || after == getPreviousWordType(target, con.start, con.limit, offset, opts); + } + if (!go) return false; + break; + case '<': + if (con.length == 0 || offset == con.limit) return false; + if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER + || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) + return false; + break; + case '>': + if (con.length == 0 || offset == con.start) return false; + if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER + || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) + return false; + break; + } // switch anchor type + + return true; + } + private static final int getPreviousWordType(ExpressionTarget target, int begin, int end, + int offset, int opts) { + int ret = getWordType(target, begin, end, --offset, opts); + while (ret == WT_IGNORE) + ret = getWordType(target, begin, end, --offset, opts); + return ret; + } + + private static final int getWordType(ExpressionTarget target, int begin, int end, + int offset, int opts) { + if (offset < begin || offset >= end) return WT_OTHER; + return getWordType0(target.charAt(offset) , opts); + } /** @@ -2085,14 +1596,14 @@ public class RegularExpression implements java.io.Serializable { con.match = match; if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { - int matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options); + int matchEnd = this.match(con, this.operations, con.start, 1, this.options); //System.err.println("DEBUG: matchEnd="+matchEnd); if (matchEnd == con.limit) { if (con.match != null) { con.match.setBeginning(0, con.start); con.match.setEnd(0, matchEnd); } - con.inuse = false; + con.setInUse(false); return true; } return false; @@ -2110,10 +1621,10 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, o); con.match.setEnd(0, o+this.fixedString.length()); } - con.inuse = false; + con.setInUse(false); return true; } - con.inuse = false; + con.setInUse(false); return false; } @@ -2126,7 +1637,7 @@ public class RegularExpression implements java.io.Serializable { int o = this.fixedStringTable.matches(target, con.start, con.limit); if (o < 0) { //System.err.println("Non-match in fixed-string search."); - con.inuse = false; + con.setInUse(false); return false; } } @@ -2142,7 +1653,7 @@ public class RegularExpression implements java.io.Serializable { && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { if (isSet(this.options, SINGLE_LINE)) { matchStart = con.start; - matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options); + matchEnd = this.match(con, this.operations, con.start, 1, this.options); } else { boolean previousIsEOL = true; for (matchStart = con.start; matchStart <= limit; matchStart ++) { @@ -2151,8 +1662,8 @@ public class RegularExpression implements java.io.Serializable { previousIsEOL = true; } else { if (previousIsEOL) { - if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, - matchStart, 1, this.options))) + if (0 <= (matchEnd = this.match(con, this.operations, + matchStart, 1, this.options))) break; } previousIsEOL = false; @@ -2167,34 +1678,17 @@ public class RegularExpression implements java.io.Serializable { else if (this.firstChar != null) { //System.err.println("DEBUG: with firstchar-matching: "+this.firstChar); RangeToken range = this.firstChar; - if (RegularExpression.isSet(this.options, IGNORE_CASE)) { - range = this.firstChar.getCaseInsensitiveToken(); - for (matchStart = con.start; matchStart <= limit; matchStart ++) { - int ch = target .setIndex( matchStart ) ; - if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { - ch = REUtil.composeFromSurrogates(ch, target .setIndex( matchStart+1 ) ); - if (!range.match(ch)) continue; - } else { - if (!range.match(ch)) { - char ch1 = Character.toUpperCase((char)ch); - if (!range.match(ch1)) - if (!range.match(Character.toLowerCase(ch1))) - continue; - } - } - if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, - matchStart, 1, this.options))) - break; + for (matchStart = con.start; matchStart <= limit; matchStart ++) { + int ch = target .setIndex( matchStart ) ; + if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { + ch = REUtil.composeFromSurrogates(ch, target.setIndex(matchStart+1)); } - } else { - for (matchStart = con.start; matchStart <= limit; matchStart ++) { - int ch = target .setIndex( matchStart ) ; - if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target .setIndex( matchStart+1 ) ); - if (!range.match(ch)) continue; - if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, - matchStart, 1, this.options))) - break; + if (!range.match(ch)) { + continue; + } + if (0 <= (matchEnd = this.match(con, this.operations, + matchStart, 1, this.options))) { + break; } } } @@ -2204,7 +1698,7 @@ public class RegularExpression implements java.io.Serializable { */ else { for (matchStart = con.start; matchStart <= limit; matchStart ++) { - if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, matchStart, 1, this.options))) + if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options))) break; } } @@ -2214,603 +1708,343 @@ public class RegularExpression implements java.io.Serializable { con.match.setBeginning(0, matchStart); con.match.setEnd(0, matchEnd); } - con.inuse = false; + con.setInUse(false); return true; } else { - con.inuse = false; + con.setInUse(false); return false; } } + // ================================================================ + /** - * @return -1 when not match; offset of the end of matched string when match. + * A regular expression. + * @serial */ - private int matchCharacterIterator (Context con, Op op, int offset, int dx, int opts) { - - - CharacterIterator target = con.ciTarget; - - - - - - - while (true) { - if (op == null) - return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; - if (offset > con.limit || offset < con.start) - return -1; - switch (op.type) { - case Op.CHAR: - if (isSet(opts, IGNORE_CASE)) { - int ch = op.getData(); - if (dx > 0) { - if (offset >= con.limit || !matchIgnoreCase(ch, target .setIndex( offset ) )) - return -1; - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .setIndex( o1 ) )) - return -1; - offset = o1; - } - } else { - int ch = op.getData(); - if (dx > 0) { - if (offset >= con.limit || ch != target .setIndex( offset ) ) - return -1; - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0 || ch != target .setIndex( o1 ) ) - return -1; - offset = o1; - } - } - op = op.next; - break; - - case Op.DOT: - if (dx > 0) { - if (offset >= con.limit) - return -1; - int ch = target .setIndex( offset ) ; - if (isSet(opts, SINGLE_LINE)) { - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - offset ++; - } else { - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) ); - if (isEOLChar(ch)) - return -1; - } - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0) - return -1; - int ch = target .setIndex( o1 ) ; - if (isSet(opts, SINGLE_LINE)) { - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - o1 --; - } else { - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch); - if (!isEOLChar(ch)) - return -1; - } - offset = o1; - } - op = op.next; - break; - - case Op.RANGE: - case Op.NRANGE: - if (dx > 0) { - if (offset >= con.limit) - return -1; - int ch = target .setIndex( offset ) ; - if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) - ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) ); - RangeToken tok = op.getToken(); - if (isSet(opts, IGNORE_CASE)) { - tok = tok.getCaseInsensitiveToken(); - if (!tok.match(ch)) { - if (ch >= 0x10000) return -1; - char uch; - if (!tok.match(uch = Character.toUpperCase((char)ch)) - && !tok.match(Character.toLowerCase(uch))) - return -1; - } - } else { - if (!tok.match(ch)) return -1; - } - offset ++; - } else { - int o1 = offset-1; - if (o1 >= con.limit || o1 < 0) - return -1; - int ch = target .setIndex( o1 ) ; - if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) - ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch); - RangeToken tok = op.getToken(); - if (isSet(opts, IGNORE_CASE)) { - tok = tok.getCaseInsensitiveToken(); - if (!tok.match(ch)) { - if (ch >= 0x10000) return -1; - char uch; - if (!tok.match(uch = Character.toUpperCase((char)ch)) - && !tok.match(Character.toLowerCase(uch))) - return -1; - } - } else { - if (!tok.match(ch)) return -1; - } - offset = o1; - } - op = op.next; - break; - - case Op.ANCHOR: - boolean go = false; - switch (op.getData()) { - case '^': - if (isSet(opts, MULTIPLE_LINES)) { - if (!(offset == con.start - || offset > con.start && isEOLChar( target .setIndex( offset-1 ) ))) - return -1; - } else { - if (offset != con.start) - return -1; - } - break; - - case '@': // Internal use only. - // The @ always matches line beginnings. - if (!(offset == con.start - || offset > con.start && isEOLChar( target .setIndex( offset-1 ) ))) - return -1; - break; + String regex; + /** + * @serial + */ + int options; - case '$': - if (isSet(opts, MULTIPLE_LINES)) { - if (!(offset == con.limit - || offset < con.limit && isEOLChar( target .setIndex( offset ) ))) - return -1; - } else { - if (!(offset == con.limit - || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) ) - || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN - && target .setIndex( offset+1 ) == LINE_FEED)) - return -1; - } - break; + /** + * The number of parenthesis in the regular expression. + * @serial + */ + int nofparen; + /** + * Internal representation of the regular expression. + * @serial + */ + Token tokentree; - case 'A': - if (offset != con.start) return -1; - break; + boolean hasBackReferences = false; - case 'Z': - if (!(offset == con.limit - || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) ) - || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN - && target .setIndex( offset+1 ) == LINE_FEED)) - return -1; - break; + transient int minlength; + transient Op operations = null; + transient int numberOfClosures; + transient Context context = null; + transient RangeToken firstChar = null; - case 'z': - if (offset != con.limit) return -1; - break; + transient String fixedString = null; + transient int fixedStringOptions; + transient BMPattern fixedStringTable = null; + transient boolean fixedStringOnly = false; - case 'b': - if (con.length == 0) return -1; - { - int after = getWordType(target, con.start, con.limit, offset, opts); - if (after == WT_IGNORE) return -1; - int before = getPreviousWordType(target, con.start, con.limit, offset, opts); - if (after == before) return -1; - } - break; + static abstract class ExpressionTarget { + abstract char charAt(int index); + abstract boolean regionMatches(boolean ignoreCase, int offset, int limit, String part, int partlen); + abstract boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen); + } + + static final class StringTarget extends ExpressionTarget { + + private String target; + + StringTarget(String target) { + this.target = target; + } + + final void resetTarget(String target) { + this.target = target; + } + + final char charAt(int index) { + return target.charAt(index); + } + + final boolean regionMatches(boolean ignoreCase, int offset, int limit, + String part, int partlen) { + if (limit-offset < partlen) { + return false; + } + return (ignoreCase) ? target.regionMatches(true, offset, part, 0, partlen) : target.regionMatches(offset, part, 0, partlen); + } - case 'B': - if (con.length == 0) - go = true; - else { - int after = getWordType(target, con.start, con.limit, offset, opts); - go = after == WT_IGNORE - || after == getPreviousWordType(target, con.start, con.limit, offset, opts); - } - if (!go) return -1; - break; + final boolean regionMatches(boolean ignoreCase, int offset, int limit, + int offset2, int partlen) { + if (limit-offset < partlen) { + return false; + } + return (ignoreCase) ? target.regionMatches(true, offset, target, offset2, partlen) + : target.regionMatches(offset, target, offset2, partlen); + } + } + + static final class CharArrayTarget extends ExpressionTarget { + + char[] target; + + CharArrayTarget(char[] target) { + this.target = target; + } - case '<': - if (con.length == 0 || offset == con.limit) return -1; - if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER - || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) - return -1; - break; + final void resetTarget(char[] target) { + this.target = target; + } - case '>': - if (con.length == 0 || offset == con.start) return -1; - if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER - || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) - return -1; - break; - } // switch anchor type - op = op.next; - break; + char charAt(int index) { + return target[index]; + } + + final boolean regionMatches(boolean ignoreCase, int offset, int limit, + String part, int partlen) { + if (offset < 0 || limit-offset < partlen) { + return false; + } + return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, part, partlen) + : regionMatches(offset, limit, part, partlen); + } - case Op.BACKREFERENCE: - { - int refno = op.getData(); - if (refno <= 0 || refno >= this.nofparen) - throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno); - if (con.match.getBeginning(refno) < 0 - || con.match.getEnd(refno) < 0) - return -1; // ******** - int o2 = con.match.getBeginning(refno); - int literallen = con.match.getEnd(refno)-o2; - if (!isSet(opts, IGNORE_CASE)) { - if (dx > 0) { - if (!regionMatches(target, offset, con.limit, o2, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) - return -1; - offset -= literallen; - } - } else { - if (dx > 0) { - if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, - o2, literallen)) - return -1; - offset -= literallen; - } - } - } - op = op.next; - break; - case Op.STRING: - { - String literal = op.getString(); - int literallen = literal.length(); - if (!isSet(opts, IGNORE_CASE)) { - if (dx > 0) { - if (!regionMatches(target, offset, con.limit, literal, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) - return -1; - offset -= literallen; - } - } else { - if (dx > 0) { - if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) - return -1; - offset += literallen; - } else { - if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, - literal, literallen)) - return -1; - offset -= literallen; - } - } + private final boolean regionMatches(int offset, int limit, String part, int partlen) { + int i = 0; + while (partlen-- > 0) { + if (target[offset++] != part.charAt(i++)) { + return false; } - op = op.next; - break; + } + return true; + } - case Op.CLOSURE: - { - /* - * Saves current position to avoid - * zero-width repeats. - */ - int id = op.getData(); - if (id >= 0) { - int previousOffset = con.offsets[id]; - if (previousOffset < 0 || previousOffset != offset) { - con.offsets[id] = offset; - } else { - con.offsets[id] = -1; - op = op.next; - break; - } - } - - int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); - if (id >= 0) con.offsets[id] = -1; - if (ret >= 0) return ret; - op = op.next; + private final boolean regionMatchesIgnoreCase(int offset, int limit, String part, int partlen) { + int i = 0; + while (partlen-- > 0) { + final char ch1 = target[offset++] ; + final char ch2 = part.charAt(i++); + if (ch1 == ch2) { + continue; } - break; - - case Op.QUESTION: - { - int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); - if (ret >= 0) return ret; - op = op.next; + final char uch1 = Character.toUpperCase(ch1); + final char uch2 = Character.toUpperCase(ch2); + if (uch1 == uch2) { + continue; } - break; - - case Op.NONGREEDYCLOSURE: - case Op.NONGREEDYQUESTION: - { - int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); - if (ret >= 0) return ret; - op = op.getChild(); + if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) { + return false; } - break; + } + return true; + } - case Op.UNION: - for (int i = 0; i < op.size(); i ++) { - int ret = this. matchCharacterIterator (con, op.elementAt(i), offset, dx, opts); - if (DEBUG) { - System.err.println("UNION: "+i+", ret="+ret); - } - if (ret >= 0) return ret; - } - return -1; - - case Op.CAPTURE: - int refno = op.getData(); - if (con.match != null && refno > 0) { - int save = con.match.getBeginning(refno); - con.match.setBeginning(refno, offset); - int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); - if (ret < 0) con.match.setBeginning(refno, save); - return ret; - } else if (con.match != null && refno < 0) { - int index = -refno; - int save = con.match.getEnd(index); - con.match.setEnd(index, offset); - int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); - if (ret < 0) con.match.setEnd(index, save); - return ret; - } - op = op.next; - break; + final boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen) { + if (offset < 0 || limit-offset < partlen) { + return false; + } + return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, offset2, partlen) + : regionMatches(offset, limit, offset2, partlen); + } - case Op.LOOKAHEAD: - if (0 > this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1; - op = op.next; - break; - case Op.NEGATIVELOOKAHEAD: - if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1; - op = op.next; - break; - case Op.LOOKBEHIND: - if (0 > this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1; - op = op.next; - break; - case Op.NEGATIVELOOKBEHIND: - if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1; - op = op.next; - break; + private final boolean regionMatches(int offset, int limit, int offset2, int partlen) { + int i = offset2; + while (partlen-- > 0) { + if ( target [ offset++ ] != target [ i++ ] ) + return false; + } + return true; + } - case Op.INDEPENDENT: - { - int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); - if (ret < 0) return ret; - offset = ret; - op = op.next; + private final boolean regionMatchesIgnoreCase(int offset, int limit, int offset2, int partlen) { + int i = offset2; + while (partlen-- > 0) { + final char ch1 = target[offset++] ; + final char ch2 = target[i++] ; + if (ch1 == ch2) { + continue; } - break; - - case Op.MODIFIER: - { - int localopts = opts; - localopts |= op.getData(); - localopts &= ~op.getData2(); - //System.err.println("MODIFIER: "+Integer.toString(opts, 16)+" -> "+Integer.toString(localopts, 16)); - int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, localopts); - if (ret < 0) return ret; - offset = ret; - op = op.next; + final char uch1 = Character.toUpperCase(ch1); + final char uch2 = Character.toUpperCase(ch2); + if (uch1 == uch2) { + continue; } - break; - - case Op.CONDITION: - { - Op.ConditionOp cop = (Op.ConditionOp)op; - boolean matchp = false; - if (cop.refNumber > 0) { - if (cop.refNumber >= this.nofparen) - throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber); - matchp = con.match.getBeginning(cop.refNumber) >= 0 - && con.match.getEnd(cop.refNumber) >= 0; - } else { - matchp = 0 <= this. matchCharacterIterator (con, cop.condition, offset, dx, opts); - } - - if (matchp) { - op = cop.yes; - } else if (cop.no != null) { - op = cop.no; - } else { - op = cop.next; - } + if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) { + return false; } - break; - - default: - throw new RuntimeException("Unknown operation type: "+op.type); - } // switch (op.type) - } // while - } - - private static final int getPreviousWordType(CharacterIterator target, int begin, int end, - int offset, int opts) { - int ret = getWordType(target, begin, end, --offset, opts); - while (ret == WT_IGNORE) - ret = getWordType(target, begin, end, --offset, opts); - return ret; + } + return true; + } } - private static final int getWordType(CharacterIterator target, int begin, int end, - int offset, int opts) { - if (offset < begin || offset >= end) return WT_OTHER; - return getWordType0( target .setIndex( offset ) , opts); - } + static final class CharacterIteratorTarget extends ExpressionTarget { + CharacterIterator target; + + CharacterIteratorTarget(CharacterIterator target) { + this.target = target; + } + final void resetTarget(CharacterIterator target) { + this.target = target; + } + final char charAt(int index) { + return target.setIndex(index); + } - private static final boolean regionMatches(CharacterIterator target, int offset, int limit, - String part, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = 0; - while (partlen-- > 0) { - if ( target .setIndex( offset++ ) != part.charAt(i++)) + final boolean regionMatches(boolean ignoreCase, int offset, int limit, + String part, int partlen) { + if (offset < 0 || limit-offset < partlen) { return false; + } + return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, part, partlen) + : regionMatches(offset, limit, part, partlen); + } + + private final boolean regionMatches(int offset, int limit, String part, int partlen) { + int i = 0; + while (partlen-- > 0) { + if (target.setIndex(offset++) != part.charAt(i++)) { + return false; + } + } + return true; + } + + private final boolean regionMatchesIgnoreCase(int offset, int limit, String part, int partlen) { + int i = 0; + while (partlen-- > 0) { + final char ch1 = target.setIndex(offset++) ; + final char ch2 = part.charAt(i++); + if (ch1 == ch2) { + continue; + } + final char uch1 = Character.toUpperCase(ch1); + final char uch2 = Character.toUpperCase(ch2); + if (uch1 == uch2) { + continue; + } + if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) { + return false; + } + } + return true; } - return true; - } - private static final boolean regionMatches(CharacterIterator target, int offset, int limit, - int offset2, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = offset2; - while (partlen-- > 0) { - if ( target .setIndex( offset++ ) != target .setIndex( i++ ) ) + final boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen) { + if (offset < 0 || limit-offset < partlen) { return false; + } + return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, offset2, partlen) + : regionMatches(offset, limit, offset2, partlen); } - return true; - } - /** - * @see java.lang.String#regionMatches - */ - private static final boolean regionMatchesIgnoreCase(CharacterIterator target, int offset, int limit, - String part, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = 0; - while (partlen-- > 0) { - char ch1 = target .setIndex( offset++ ) ; - char ch2 = part.charAt(i++); - if (ch1 == ch2) - continue; - char uch1 = Character.toUpperCase(ch1); - char uch2 = Character.toUpperCase(ch2); - if (uch1 == uch2) - continue; - if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) - return false; + private final boolean regionMatches(int offset, int limit, int offset2, int partlen) { + int i = offset2; + while (partlen-- > 0) { + if (target.setIndex(offset++) != target.setIndex(i++)) { + return false; + } + } + return true; } - return true; - } - private static final boolean regionMatchesIgnoreCase(CharacterIterator target, int offset, int limit, - int offset2, int partlen) { - if (offset < 0) return false; - if (limit-offset < partlen) - return false; - int i = offset2; - while (partlen-- > 0) { - char ch1 = target .setIndex( offset++ ) ; - char ch2 = target .setIndex( i++ ) ; - if (ch1 == ch2) - continue; - char uch1 = Character.toUpperCase(ch1); - char uch2 = Character.toUpperCase(ch2); - if (uch1 == uch2) - continue; - if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) - return false; + private final boolean regionMatchesIgnoreCase(int offset, int limit, int offset2, int partlen) { + int i = offset2; + while (partlen-- > 0) { + final char ch1 = target.setIndex(offset++) ; + final char ch2 = target.setIndex(i++) ; + if (ch1 == ch2) { + continue; + } + final char uch1 = Character.toUpperCase(ch1); + final char uch2 = Character.toUpperCase(ch2); + if (uch1 == uch2) { + continue; + } + if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) { + return false; + } + } + return true; } - return true; } - - - - // ================================================================ - - /** - * A regular expression. - * @serial - */ - String regex; - /** - * @serial - */ - int options; - - /** - * The number of parenthesis in the regular expression. - * @serial - */ - int nofparen; - /** - * Internal representation of the regular expression. - * @serial - */ - Token tokentree; - - boolean hasBackReferences = false; - - transient int minlength; - transient Op operations = null; - transient int numberOfClosures; - transient Context context = null; - transient RangeToken firstChar = null; - - transient String fixedString = null; - transient int fixedStringOptions; - transient BMPattern fixedStringTable = null; - transient boolean fixedStringOnly = false; - - static final class Context { - CharacterIterator ciTarget; - String strTarget; - char[] charTarget; int start; int limit; int length; Match match; boolean inuse = false; int[] offsets; + + private StringTarget stringTarget; + private CharArrayTarget charArrayTarget; + private CharacterIteratorTarget characterIteratorTarget; + + ExpressionTarget target; Context() { } private void resetCommon(int nofclosures) { this.length = this.limit-this.start; - this.inuse = true; + setInUse(true); this.match = null; if (this.offsets == null || this.offsets.length != nofclosures) this.offsets = new int[nofclosures]; for (int i = 0; i < nofclosures; i ++) this.offsets[i] = -1; } + void reset(CharacterIterator target, int start, int limit, int nofclosures) { - this.ciTarget = target; + if (characterIteratorTarget == null) { + characterIteratorTarget = new CharacterIteratorTarget(target); + } + else { + characterIteratorTarget.resetTarget(target); + } + this.target = characterIteratorTarget; this.start = start; this.limit = limit; this.resetCommon(nofclosures); } + void reset(String target, int start, int limit, int nofclosures) { - this.strTarget = target; + if (stringTarget == null) { + stringTarget = new StringTarget(target); + } + else { + stringTarget.resetTarget(target); + } + this.target = stringTarget; this.start = start; this.limit = limit; this.resetCommon(nofclosures); } + void reset(char[] target, int start, int limit, int nofclosures) { - this.charTarget = target; + if (charArrayTarget == null) { + charArrayTarget = new CharArrayTarget(target); + } + else { + charArrayTarget.resetTarget(target); + } + this.target = charArrayTarget; this.start = start; this.limit = limit; this.resetCommon(nofclosures); } + synchronized void setInUse(boolean inUse) { + this.inuse = inUse; + } } /** @@ -2967,7 +2201,7 @@ public class RegularExpression implements java.io.Serializable { * @exception org.apache.xerces.utils.regex.ParseException <VAR>regex</VAR> is not conforming to the syntax. */ public RegularExpression(String regex) throws ParseException { - this.setPattern(regex, null); + this(regex, null); } /** @@ -2980,6 +2214,17 @@ public class RegularExpression implements java.io.Serializable { public RegularExpression(String regex, String options) throws ParseException { this.setPattern(regex, options); } + + /** + * Creates a new RegularExpression instance with options. + * + * @param regex A regular expression + * @param options A String consisted of "i" "m" "s" "u" "w" "," "X" + * @exception org.apache.xerces.utils.regex.ParseException <VAR>regex</VAR> is not conforming to the syntax. + */ + public RegularExpression(String regex, String options, Locale locale) throws ParseException { + this.setPattern(regex, options, locale); + } RegularExpression(String regex, Token tok, int parens, boolean hasBackReferences, int options) { this.regex = regex; @@ -2993,14 +2238,18 @@ public class RegularExpression implements java.io.Serializable { * */ public void setPattern(String newPattern) throws ParseException { - this.setPattern(newPattern, this.options); + this.setPattern(newPattern, Locale.getDefault()); + } + + public void setPattern(String newPattern, Locale locale) throws ParseException { + this.setPattern(newPattern, this.options, locale); } - private void setPattern(String newPattern, int options) throws ParseException { + private void setPattern(String newPattern, int options, Locale locale) throws ParseException { this.regex = newPattern; this.options = options; RegexParser rp = RegularExpression.isSet(this.options, RegularExpression.XMLSCHEMA_MODE) - ? new ParserForXMLSchema() : new RegexParser(); + ? new ParserForXMLSchema(locale) : new RegexParser(locale); this.tokentree = rp.parse(this.regex, this.options); this.nofparen = rp.parennumber; this.hasBackReferences = rp.hasBackReferences; @@ -3012,7 +2261,11 @@ public class RegularExpression implements java.io.Serializable { * */ public void setPattern(String newPattern, String options) throws ParseException { - this.setPattern(newPattern, REUtil.parseOptions(options)); + this.setPattern(newPattern, options, Locale.getDefault()); + } + + public void setPattern(String newPattern, String options, Locale locale) throws ParseException { + this.setPattern(newPattern, REUtil.parseOptions(options), locale); } /**
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor