In javascript, is there an equivalent of String.indexOf() that takes a regular expression instead of a string for the first first parameter while still allowing a second parameter ?
I need to do something like
str.indexOf(/[abc]/ , i);
and
str.lastIndexOf(/[abc]/ , i);
While String.search() takes a regexp as a parameter it does not allow me to specify a second argument!
Edit:
This turned out to be harder than I originally thought so I wrote a small test function to test all the provided solutions... it assumes regexIndexOf and regexLastIndexOf have been added to the String object.
function test (str) {
var i = str.length +2;
while (i--) {
if (str.indexOf('a',i) != str.regexIndexOf(/a/,i))
alert (['failed regexIndexOf ' , str,i , str.indexOf('a',i) , str.regexIndexOf(/a/,i)]) ;
if (str.lastIndexOf('a',i) != str.regexLastIndexOf(/a/,i) )
alert (['failed regexLastIndexOf ' , str,i,str.lastIndexOf('a',i) , str.regexLastIndexOf(/a/,i)]) ;
}
}
and I am testing as follow to make sure that at least for one character regexp, the result is the same as if we used indexOf
//Look for the a among the xes
test('xxx');
test('axx');
test('xax');
test('xxa');
test('axa');
test('xaa');
test('aax');
test('aaa');
This question is related to
javascript
regex
indexof
There are still no native methods that perform the requested task.
Here is the code that I am using. It mimics the behaviour of String.prototype.indexOf and String.prototype.lastIndexOf methods but they also accept a RegExp as the search argument in addition to a string representing the value to search for.
Yes it is quite long as an answer goes as it tries to follow current standards as close as possible and of course contains a reasonable amount of JSDOC comments. However, once minified, the code is only 2.27k and once gzipped for transmission it is only 1023 bytes.
The 2 methods that this adds to String.prototype
(using Object.defineProperty where available) are:
searchOf
searchLastOf
It passes all the tests that the OP posted and additionally I have tested the routines quite thoroughly in my daily usage, and have attempted to be sure that they work across multiple environments, but feedback/issues are always welcome.
/*jslint maxlen:80, browser:true */_x000D_
_x000D_
/*_x000D_
* Properties used by searchOf and searchLastOf implementation._x000D_
*/_x000D_
_x000D_
/*property_x000D_
MAX_SAFE_INTEGER, abs, add, apply, call, configurable, defineProperty,_x000D_
enumerable, exec, floor, global, hasOwnProperty, ignoreCase, index,_x000D_
lastIndex, lastIndexOf, length, max, min, multiline, pow, prototype,_x000D_
remove, replace, searchLastOf, searchOf, source, toString, value, writable_x000D_
*/_x000D_
_x000D_
/*_x000D_
* Properties used in the testing of searchOf and searchLastOf implimentation._x000D_
*/_x000D_
_x000D_
/*property_x000D_
appendChild, createTextNode, getElementById, indexOf, lastIndexOf, length,_x000D_
searchLastOf, searchOf, unshift_x000D_
*/_x000D_
_x000D_
(function () {_x000D_
'use strict';_x000D_
_x000D_
var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1,_x000D_
getNativeFlags = new RegExp('\\/([a-z]*)$', 'i'),_x000D_
clipDups = new RegExp('([\\s\\S])(?=[\\s\\S]*\\1)', 'g'),_x000D_
pToString = Object.prototype.toString,_x000D_
pHasOwn = Object.prototype.hasOwnProperty,_x000D_
stringTagRegExp;_x000D_
_x000D_
/**_x000D_
* Defines a new property directly on an object, or modifies an existing_x000D_
* property on an object, and returns the object._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {Object} object_x000D_
* @param {string} property_x000D_
* @param {Object} descriptor_x000D_
* @returns {Object}_x000D_
* @see https://goo.gl/CZnEqg_x000D_
*/_x000D_
function $defineProperty(object, property, descriptor) {_x000D_
if (Object.defineProperty) {_x000D_
Object.defineProperty(object, property, descriptor);_x000D_
} else {_x000D_
object[property] = descriptor.value;_x000D_
}_x000D_
_x000D_
return object;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns true if the operands are strictly equal with no type conversion._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} a_x000D_
* @param {*} b_x000D_
* @returns {boolean}_x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-11.9.4_x000D_
*/_x000D_
function $strictEqual(a, b) {_x000D_
return a === b;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns true if the operand inputArg is undefined._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {boolean}_x000D_
*/_x000D_
function $isUndefined(inputArg) {_x000D_
return $strictEqual(typeof inputArg, 'undefined');_x000D_
}_x000D_
_x000D_
/**_x000D_
* Provides a string representation of the supplied object in the form_x000D_
* "[object type]", where type is the object type._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object for which a class string represntation_x000D_
* is required._x000D_
* @returns {string} A string value of the form "[object type]"._x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-15.2.4.2_x000D_
*/_x000D_
function $toStringTag(inputArg) {_x000D_
var val;_x000D_
if (inputArg === null) {_x000D_
val = '[object Null]';_x000D_
} else if ($isUndefined(inputArg)) {_x000D_
val = '[object Undefined]';_x000D_
} else {_x000D_
val = pToString.call(inputArg);_x000D_
}_x000D_
_x000D_
return val;_x000D_
}_x000D_
_x000D_
/**_x000D_
* The string tag representation of a RegExp object._x000D_
*_x000D_
* @private_x000D_
* @type {string}_x000D_
*/_x000D_
stringTagRegExp = $toStringTag(getNativeFlags);_x000D_
_x000D_
/**_x000D_
* Returns true if the operand inputArg is a RegExp._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {boolean}_x000D_
*/_x000D_
function $isRegExp(inputArg) {_x000D_
return $toStringTag(inputArg) === stringTagRegExp &&_x000D_
pHasOwn.call(inputArg, 'ignoreCase') &&_x000D_
typeof inputArg.ignoreCase === 'boolean' &&_x000D_
pHasOwn.call(inputArg, 'global') &&_x000D_
typeof inputArg.global === 'boolean' &&_x000D_
pHasOwn.call(inputArg, 'multiline') &&_x000D_
typeof inputArg.multiline === 'boolean' &&_x000D_
pHasOwn.call(inputArg, 'source') &&_x000D_
typeof inputArg.source === 'string';_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation throws an error if its argument is a value that_x000D_
* cannot be converted to an Object, otherwise returns the argument._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object to be tested._x000D_
* @throws {TypeError} If inputArg is null or undefined._x000D_
* @returns {*} The inputArg if coercible._x000D_
* @see https://goo.gl/5GcmVq_x000D_
*/_x000D_
function $requireObjectCoercible(inputArg) {_x000D_
var errStr;_x000D_
_x000D_
if (inputArg === null || $isUndefined(inputArg)) {_x000D_
errStr = 'Cannot convert argument to object: ' + inputArg;_x000D_
throw new TypeError(errStr);_x000D_
}_x000D_
_x000D_
return inputArg;_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation converts its argument to a value of type string_x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {string}_x000D_
* @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tostring_x000D_
*/_x000D_
function $toString(inputArg) {_x000D_
var type,_x000D_
val;_x000D_
_x000D_
if (inputArg === null) {_x000D_
val = 'null';_x000D_
} else {_x000D_
type = typeof inputArg;_x000D_
if (type === 'string') {_x000D_
val = inputArg;_x000D_
} else if (type === 'undefined') {_x000D_
val = type;_x000D_
} else {_x000D_
if (type === 'symbol') {_x000D_
throw new TypeError('Cannot convert symbol to string');_x000D_
}_x000D_
_x000D_
val = String(inputArg);_x000D_
}_x000D_
}_x000D_
_x000D_
return val;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns a string only if the arguments is coercible otherwise throws an_x000D_
* error._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @throws {TypeError} If inputArg is null or undefined._x000D_
* @returns {string}_x000D_
*/_x000D_
function $onlyCoercibleToString(inputArg) {_x000D_
return $toString($requireObjectCoercible(inputArg));_x000D_
}_x000D_
_x000D_
/**_x000D_
* The function evaluates the passed value and converts it to an integer._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object to be converted to an integer._x000D_
* @returns {number} If the target value is NaN, null or undefined, 0 is_x000D_
* returned. If the target value is false, 0 is returned_x000D_
* and if true, 1 is returned._x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.4_x000D_
*/_x000D_
function $toInteger(inputArg) {_x000D_
var number = +inputArg,_x000D_
val = 0;_x000D_
_x000D_
if ($strictEqual(number, number)) {_x000D_
if (!number || number === Infinity || number === -Infinity) {_x000D_
val = number;_x000D_
} else {_x000D_
val = (number > 0 || -1) * Math.floor(Math.abs(number));_x000D_
}_x000D_
}_x000D_
_x000D_
return val;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Copies a regex object. Allows adding and removing native flags while_x000D_
* copying the regex._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {RegExp} regex Regex to copy._x000D_
* @param {Object} [options] Allows specifying native flags to add or_x000D_
* remove while copying the regex._x000D_
* @returns {RegExp} Copy of the provided regex, possibly with modified_x000D_
* flags._x000D_
*/_x000D_
function $copyRegExp(regex, options) {_x000D_
var flags,_x000D_
opts,_x000D_
rx;_x000D_
_x000D_
if (options !== null && typeof options === 'object') {_x000D_
opts = options;_x000D_
} else {_x000D_
opts = {};_x000D_
}_x000D_
_x000D_
// Get native flags in use_x000D_
flags = getNativeFlags.exec($toString(regex))[1];_x000D_
flags = $onlyCoercibleToString(flags);_x000D_
if (opts.add) {_x000D_
flags += opts.add;_x000D_
flags = flags.replace(clipDups, '');_x000D_
}_x000D_
_x000D_
if (opts.remove) {_x000D_
// Would need to escape `options.remove` if this was public_x000D_
rx = new RegExp('[' + opts.remove + ']+', 'g');_x000D_
flags = flags.replace(rx, '');_x000D_
}_x000D_
_x000D_
return new RegExp(regex.source, flags);_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation ToLength converts its argument to an integer_x000D_
* suitable for use as the length of an array-like object._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object to be converted to a length._x000D_
* @returns {number} If len <= +0 then +0 else if len is +INFINITY then_x000D_
* 2^53-1 else min(len, 2^53-1)._x000D_
* @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tolength_x000D_
*/_x000D_
function $toLength(inputArg) {_x000D_
return Math.min(Math.max($toInteger(inputArg), 0), MAX_SAFE_INTEGER);_x000D_
}_x000D_
_x000D_
/**_x000D_
* Copies a regex object so that it is suitable for use with searchOf and_x000D_
* searchLastOf methods._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {RegExp} regex Regex to copy._x000D_
* @returns {RegExp}_x000D_
*/_x000D_
function $toSearchRegExp(regex) {_x000D_
return $copyRegExp(regex, {_x000D_
add: 'g',_x000D_
remove: 'y'_x000D_
});_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns true if the operand inputArg is a member of one of the types_x000D_
* Undefined, Null, Boolean, Number, Symbol, or String._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {boolean}_x000D_
* @see https://goo.gl/W68ywJ_x000D_
* @see https://goo.gl/ev7881_x000D_
*/_x000D_
function $isPrimitive(inputArg) {_x000D_
var type = typeof inputArg;_x000D_
_x000D_
return type === 'undefined' ||_x000D_
inputArg === null ||_x000D_
type === 'boolean' ||_x000D_
type === 'string' ||_x000D_
type === 'number' ||_x000D_
type === 'symbol';_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation converts its argument to a value of type Object_x000D_
* but fixes some environment bugs._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The argument to be converted to an object._x000D_
* @throws {TypeError} If inputArg is not coercible to an object._x000D_
* @returns {Object} Value of inputArg as type Object._x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.9_x000D_
*/_x000D_
function $toObject(inputArg) {_x000D_
var object;_x000D_
_x000D_
if ($isPrimitive($requireObjectCoercible(inputArg))) {_x000D_
object = Object(inputArg);_x000D_
} else {_x000D_
object = inputArg;_x000D_
}_x000D_
_x000D_
return object;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Converts a single argument that is an array-like object or list (eg._x000D_
* arguments, NodeList, DOMTokenList (used by classList), NamedNodeMap_x000D_
* (used by attributes property)) into a new Array() and returns it._x000D_
* This is a partial implementation of the ES6 Array.from_x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {Object} arrayLike_x000D_
* @returns {Array}_x000D_
*/_x000D_
function $toArray(arrayLike) {_x000D_
var object = $toObject(arrayLike),_x000D_
length = $toLength(object.length),_x000D_
array = [],_x000D_
index = 0;_x000D_
_x000D_
array.length = length;_x000D_
while (index < length) {_x000D_
array[index] = object[index];_x000D_
index += 1;_x000D_
}_x000D_
_x000D_
return array;_x000D_
}_x000D_
_x000D_
if (!String.prototype.searchOf) {_x000D_
/**_x000D_
* This method returns the index within the calling String object of_x000D_
* the first occurrence of the specified value, starting the search at_x000D_
* fromIndex. Returns -1 if the value is not found._x000D_
*_x000D_
* @function_x000D_
* @this {string}_x000D_
* @param {RegExp|string} regex A regular expression object or a String._x000D_
* Anything else is implicitly converted to_x000D_
* a String._x000D_
* @param {Number} [fromIndex] The location within the calling string_x000D_
* to start the search from. It can be any_x000D_
* integer. The default value is 0. If_x000D_
* fromIndex < 0 the entire string is_x000D_
* searched (same as passing 0). If_x000D_
* fromIndex >= str.length, the method will_x000D_
* return -1 unless searchValue is an empty_x000D_
* string in which case str.length is_x000D_
* returned._x000D_
* @returns {Number} If successful, returns the index of the first_x000D_
* match of the regular expression inside the_x000D_
* string. Otherwise, it returns -1._x000D_
*/_x000D_
$defineProperty(String.prototype, 'searchOf', {_x000D_
enumerable: false,_x000D_
configurable: true,_x000D_
writable: true,_x000D_
value: function (regex) {_x000D_
var str = $onlyCoercibleToString(this),_x000D_
args = $toArray(arguments),_x000D_
result = -1,_x000D_
fromIndex,_x000D_
match,_x000D_
rx;_x000D_
_x000D_
if (!$isRegExp(regex)) {_x000D_
return String.prototype.indexOf.apply(str, args);_x000D_
}_x000D_
_x000D_
if ($toLength(args.length) > 1) {_x000D_
fromIndex = +args[1];_x000D_
if (fromIndex < 0) {_x000D_
fromIndex = 0;_x000D_
}_x000D_
} else {_x000D_
fromIndex = 0;_x000D_
}_x000D_
_x000D_
if (fromIndex >= $toLength(str.length)) {_x000D_
return result;_x000D_
}_x000D_
_x000D_
rx = $toSearchRegExp(regex);_x000D_
rx.lastIndex = fromIndex;_x000D_
match = rx.exec(str);_x000D_
if (match) {_x000D_
result = +match.index;_x000D_
}_x000D_
_x000D_
return result;_x000D_
}_x000D_
});_x000D_
}_x000D_
_x000D_
if (!String.prototype.searchLastOf) {_x000D_
/**_x000D_
* This method returns the index within the calling String object of_x000D_
* the last occurrence of the specified value, or -1 if not found._x000D_
* The calling string is searched backward, starting at fromIndex._x000D_
*_x000D_
* @function_x000D_
* @this {string}_x000D_
* @param {RegExp|string} regex A regular expression object or a String._x000D_
* Anything else is implicitly converted to_x000D_
* a String._x000D_
* @param {Number} [fromIndex] Optional. The location within the_x000D_
* calling string to start the search at,_x000D_
* indexed from left to right. It can be_x000D_
* any integer. The default value is_x000D_
* str.length. If it is negative, it is_x000D_
* treated as 0. If fromIndex > str.length,_x000D_
* fromIndex is treated as str.length._x000D_
* @returns {Number} If successful, returns the index of the first_x000D_
* match of the regular expression inside the_x000D_
* string. Otherwise, it returns -1._x000D_
*/_x000D_
$defineProperty(String.prototype, 'searchLastOf', {_x000D_
enumerable: false,_x000D_
configurable: true,_x000D_
writable: true,_x000D_
value: function (regex) {_x000D_
var str = $onlyCoercibleToString(this),_x000D_
args = $toArray(arguments),_x000D_
result = -1,_x000D_
fromIndex,_x000D_
length,_x000D_
match,_x000D_
pos,_x000D_
rx;_x000D_
_x000D_
if (!$isRegExp(regex)) {_x000D_
return String.prototype.lastIndexOf.apply(str, args);_x000D_
}_x000D_
_x000D_
length = $toLength(str.length);_x000D_
if (!$strictEqual(args[1], args[1])) {_x000D_
fromIndex = length;_x000D_
} else {_x000D_
if ($toLength(args.length) > 1) {_x000D_
fromIndex = $toInteger(args[1]);_x000D_
} else {_x000D_
fromIndex = length - 1;_x000D_
}_x000D_
}_x000D_
_x000D_
if (fromIndex >= 0) {_x000D_
fromIndex = Math.min(fromIndex, length - 1);_x000D_
} else {_x000D_
fromIndex = length - Math.abs(fromIndex);_x000D_
}_x000D_
_x000D_
pos = 0;_x000D_
rx = $toSearchRegExp(regex);_x000D_
while (pos <= fromIndex) {_x000D_
rx.lastIndex = pos;_x000D_
match = rx.exec(str);_x000D_
if (!match) {_x000D_
break;_x000D_
}_x000D_
_x000D_
pos = +match.index;_x000D_
if (pos <= fromIndex) {_x000D_
result = pos;_x000D_
}_x000D_
_x000D_
pos += 1;_x000D_
}_x000D_
_x000D_
return result;_x000D_
}_x000D_
});_x000D_
}_x000D_
}());_x000D_
_x000D_
(function () {_x000D_
'use strict';_x000D_
_x000D_
/*_x000D_
* testing as follow to make sure that at least for one character regexp,_x000D_
* the result is the same as if we used indexOf_x000D_
*/_x000D_
_x000D_
var pre = document.getElementById('out');_x000D_
_x000D_
function log(result) {_x000D_
pre.appendChild(document.createTextNode(result + '\n'));_x000D_
}_x000D_
_x000D_
function test(str) {_x000D_
var i = str.length + 2,_x000D_
r,_x000D_
a,_x000D_
b;_x000D_
_x000D_
while (i) {_x000D_
a = str.indexOf('a', i);_x000D_
b = str.searchOf(/a/, i);_x000D_
r = ['Failed', 'searchOf', str, i, a, b];_x000D_
if (a === b) {_x000D_
r[0] = 'Passed';_x000D_
}_x000D_
_x000D_
log(r);_x000D_
a = str.lastIndexOf('a', i);_x000D_
b = str.searchLastOf(/a/, i);_x000D_
r = ['Failed', 'searchLastOf', str, i, a, b];_x000D_
if (a === b) {_x000D_
r[0] = 'Passed';_x000D_
}_x000D_
_x000D_
log(r);_x000D_
i -= 1;_x000D_
}_x000D_
}_x000D_
_x000D_
/*_x000D_
* Look for the a among the xes_x000D_
*/_x000D_
_x000D_
test('xxx');_x000D_
test('axx');_x000D_
test('xax');_x000D_
test('xxa');_x000D_
test('axa');_x000D_
test('xaa');_x000D_
test('aax');_x000D_
test('aaa');_x000D_
}());
_x000D_
<pre id="out"></pre>
_x000D_
Instances of the String
constructor have a .search()
method which accepts a RegExp and returns the index of the first match.
To start the search from a particular position (faking the second parameter of .indexOf()
) you can slice
off the first i
characters:
str.slice(i).search(/re/)
But this will get the index in the shorter string (after the first part was sliced off) so you'll want to then add the length of the chopped off part (i
) to the returned index if it wasn't -1
. This will give you the index in the original string:
function regexIndexOf(text, re, i) {
var indexInSuffix = text.slice(i).search(re);
return indexInSuffix < 0 ? indexInSuffix : indexInSuffix + i;
}
After having all the proposed solutions fail my tests one way or the other, (edit: some were updated to pass the tests after I wrote this) I found the mozilla implementation for Array.indexOf and Array.lastIndexOf
I used those to implement my version of String.prototype.regexIndexOf and String.prototype.regexLastIndexOf as follows:
String.prototype.regexIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]) || 0;
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
for (; from < len; from++) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
String.prototype.regexLastIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]);
if (isNaN(from)) {
from = len - 1;
} else {
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
else if (from >= len)
from = len - 1;
}
for (; from > -1; from--) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
They seem to pass the test functions I provided in the question.
Obviously they only work if the regular expression matches one character but that is enough for my purpose since I will be using it for things like ( [abc] , \s , \W , \D )
I will keep monitoring the question in case someone provides a better/faster/cleaner/more generic implementation that works on any regular expression.
It does not natively, but you certainly can add this functionality
<script type="text/javascript">
String.prototype.regexIndexOf = function( pattern, startIndex )
{
startIndex = startIndex || 0;
var searchResult = this.substr( startIndex ).search( pattern );
return ( -1 === searchResult ) ? -1 : searchResult + startIndex;
}
String.prototype.regexLastIndexOf = function( pattern, startIndex )
{
startIndex = startIndex === undefined ? this.length : startIndex;
var searchResult = this.substr( 0, startIndex ).reverse().regexIndexOf( pattern, 0 );
return ( -1 === searchResult ) ? -1 : this.length - ++searchResult;
}
String.prototype.reverse = function()
{
return this.split('').reverse().join('');
}
// Indexes 0123456789
var str = 'caabbccdda';
alert( [
str.regexIndexOf( /[cd]/, 4 )
, str.regexLastIndexOf( /[cd]/, 4 )
, str.regexIndexOf( /[yz]/, 4 )
, str.regexLastIndexOf( /[yz]/, 4 )
, str.lastIndexOf( 'd', 4 )
, str.regexLastIndexOf( /d/, 4 )
, str.lastIndexOf( 'd' )
, str.regexLastIndexOf( /d/ )
]
);
</script>
I didn't fully test these methods, but they seem to work so far.
Based on BaileyP's answer. The main difference is that these methods return -1
if the pattern can't be matched.
Edit: Thanks to Jason Bunting's answer I got an idea. Why not modify the .lastIndex
property of the regex? Though this will only work for patterns with the global flag (/g
).
Edit: Updated to pass the test-cases.
String.prototype.regexIndexOf = function(re, startPos) {
startPos = startPos || 0;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
re.lastIndex = startPos;
var match = re.exec(this);
if (match) return match.index;
else return -1;
}
String.prototype.regexLastIndexOf = function(re, startPos) {
startPos = startPos === undefined ? this.length : startPos;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
var lastSuccess = -1;
for (var pos = 0; pos <= startPos; pos++) {
re.lastIndex = pos;
var match = re.exec(this);
if (!match) break;
pos = match.index;
if (pos <= startPos) lastSuccess = pos;
}
return lastSuccess;
}
I needed a regexIndexOf
function also for an array, so I programed one myself. However I doubt, that it's optimized, but i guess it should work properly.
Array.prototype.regexIndexOf = function (regex, startpos = 0) {
len = this.length;
for(x = startpos; x < len; x++){
if(typeof this[x] != 'undefined' && (''+this[x]).match(regex)){
return x;
}
}
return -1;
}
arr = [];
arr.push(null);
arr.push(NaN);
arr[3] = 7;
arr.push('asdf');
arr.push('qwer');
arr.push(9);
arr.push('...');
console.log(arr);
arr.regexIndexOf(/\d/, 4);
I have a short version for you. It works well for me!
var match = str.match(/[abc]/gi);
var firstIndex = str.indexOf(match[0]);
var lastIndex = str.lastIndexOf(match[match.length-1]);
And if you want a prototype version:
String.prototype.indexOfRegex = function(regex){
var match = this.match(regex);
return match ? this.indexOf(match[0]) : -1;
}
String.prototype.lastIndexOfRegex = function(regex){
var match = this.match(regex);
return match ? this.lastIndexOf(match[match.length-1]) : -1;
}
EDIT : if you want to add support for fromIndex
String.prototype.indexOfRegex = function(regex, fromIndex){
var str = fromIndex ? this.substring(fromIndex) : this;
var match = str.match(regex);
return match ? str.indexOf(match[0]) + fromIndex : -1;
}
String.prototype.lastIndexOfRegex = function(regex, fromIndex){
var str = fromIndex ? this.substring(0, fromIndex) : this;
var match = str.match(regex);
return match ? str.lastIndexOf(match[match.length-1]) : -1;
}
To use it, as simple as this:
var firstIndex = str.indexOfRegex(/[abc]/gi);
var lastIndex = str.lastIndexOfRegex(/[abc]/gi);
If you are looking for a very simple lastIndex lookup with RegExp and don't care if it mimics lastIndexOf to the last detail, this may catch your attention.
I simply reverse the string, and subtract the first occurence index from length - 1. It happens to pass my test, but I think there could arise a performance issue with long strings.
interface String {
reverse(): string;
lastIndex(regex: RegExp): number;
}
String.prototype.reverse = function(this: string) {
return this.split("")
.reverse()
.join("");
};
String.prototype.lastIndex = function(this: string, regex: RegExp) {
const exec = regex.exec(this.reverse());
return exec === null ? -1 : this.length - 1 - exec.index;
};
After having all the proposed solutions fail my tests one way or the other, (edit: some were updated to pass the tests after I wrote this) I found the mozilla implementation for Array.indexOf and Array.lastIndexOf
I used those to implement my version of String.prototype.regexIndexOf and String.prototype.regexLastIndexOf as follows:
String.prototype.regexIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]) || 0;
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
for (; from < len; from++) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
String.prototype.regexLastIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]);
if (isNaN(from)) {
from = len - 1;
} else {
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
else if (from >= len)
from = len - 1;
}
for (; from > -1; from--) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
They seem to pass the test functions I provided in the question.
Obviously they only work if the regular expression matches one character but that is enough for my purpose since I will be using it for things like ( [abc] , \s , \W , \D )
I will keep monitoring the question in case someone provides a better/faster/cleaner/more generic implementation that works on any regular expression.
I have a short version for you. It works well for me!
var match = str.match(/[abc]/gi);
var firstIndex = str.indexOf(match[0]);
var lastIndex = str.lastIndexOf(match[match.length-1]);
And if you want a prototype version:
String.prototype.indexOfRegex = function(regex){
var match = this.match(regex);
return match ? this.indexOf(match[0]) : -1;
}
String.prototype.lastIndexOfRegex = function(regex){
var match = this.match(regex);
return match ? this.lastIndexOf(match[match.length-1]) : -1;
}
EDIT : if you want to add support for fromIndex
String.prototype.indexOfRegex = function(regex, fromIndex){
var str = fromIndex ? this.substring(fromIndex) : this;
var match = str.match(regex);
return match ? str.indexOf(match[0]) + fromIndex : -1;
}
String.prototype.lastIndexOfRegex = function(regex, fromIndex){
var str = fromIndex ? this.substring(0, fromIndex) : this;
var match = str.match(regex);
return match ? str.lastIndexOf(match[match.length-1]) : -1;
}
To use it, as simple as this:
var firstIndex = str.indexOfRegex(/[abc]/gi);
var lastIndex = str.lastIndexOfRegex(/[abc]/gi);
In certain simple cases, you can simplify your backwards search by using split.
function regexlast(string,re){
var tokens=string.split(re);
return (tokens.length>1)?(string.length-tokens[tokens.length-1].length):null;
}
This has a few serious problems:
But on the bright side it's way less code. For a constant-length regex that can't overlap (like /\s\w/
for finding word boundaries) this is good enough.
I used String.prototype.match(regex)
which returns a string array of all found matches of the given regex
in the string (more info see here):
function getLastIndex(text, regex, limit = text.length) {_x000D_
const matches = text.match(regex);_x000D_
_x000D_
// no matches found_x000D_
if (!matches) {_x000D_
return -1;_x000D_
}_x000D_
_x000D_
// matches found but first index greater than limit_x000D_
if (text.indexOf(matches[0] + matches[0].length) > limit) {_x000D_
return -1;_x000D_
}_x000D_
_x000D_
// reduce index until smaller than limit_x000D_
let i = matches.length - 1;_x000D_
let index = text.lastIndexOf(matches[i]);_x000D_
while (index > limit && i >= 0) {_x000D_
i--;_x000D_
index = text.lastIndexOf(matches[i]);_x000D_
}_x000D_
return index > limit ? -1 : index;_x000D_
}_x000D_
_x000D_
// expect -1 as first index === 14_x000D_
console.log(getLastIndex('First Sentence. Last Sentence. Unfinished', /\. /g, 10));_x000D_
_x000D_
// expect 29_x000D_
console.log(getLastIndex('First Sentence. Last Sentence. Unfinished', /\. /g));
_x000D_
For data with sparse matches, using string.search is the fastest across browsers. It re-slices a string each iteration to :
function lastIndexOfSearch(string, regex, index) {
if(index === 0 || index)
string = string.slice(0, Math.max(0,index));
var idx;
var offset = -1;
while ((idx = string.search(regex)) !== -1) {
offset += idx + 1;
string = string.slice(idx + 1);
}
return offset;
}
For dense data I made this. It's complex compared to the execute method, but for dense data, it's 2-10x faster than every other method I tried, and about 100x faster than the accepted solution. The main points are:
The new regex is executed and the results from either that exec, or the first exec, are returned;
function lastIndexOfGroupSimple(string, regex, index) {
if (index === 0 || index) string = string.slice(0, Math.max(0, index + 1));
regex.lastIndex = 0;
var lastRegex, index
flags = 'g' + (regex.multiline ? 'm' : '') + (regex.ignoreCase ? 'i' : ''),
key = regex.source + '$' + flags,
match = regex.exec(string);
if (!match) return -1;
if (lastIndexOfGroupSimple.cache === undefined) lastIndexOfGroupSimple.cache = {};
lastRegex = lastIndexOfGroupSimple.cache[key];
if (!lastRegex)
lastIndexOfGroupSimple.cache[key] = lastRegex = new RegExp('.*(' + regex.source + ')(?!.*?' + regex.source + ')', flags);
index = match.index;
lastRegex.lastIndex = match.index;
return (match = lastRegex.exec(string)) ? lastRegex.lastIndex - match[1].length : index;
};
I don't understand the purpose of the tests up top. Situations that require a regex are impossible to compare against a call to indexOf, which I think is the point of making the method in the first place. To get the test to pass, it makes more sense to use 'xxx+(?!x)', than adjust the way the regex iterates.
Based on BaileyP's answer. The main difference is that these methods return -1
if the pattern can't be matched.
Edit: Thanks to Jason Bunting's answer I got an idea. Why not modify the .lastIndex
property of the regex? Though this will only work for patterns with the global flag (/g
).
Edit: Updated to pass the test-cases.
String.prototype.regexIndexOf = function(re, startPos) {
startPos = startPos || 0;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
re.lastIndex = startPos;
var match = re.exec(this);
if (match) return match.index;
else return -1;
}
String.prototype.regexLastIndexOf = function(re, startPos) {
startPos = startPos === undefined ? this.length : startPos;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
var lastSuccess = -1;
for (var pos = 0; pos <= startPos; pos++) {
re.lastIndex = pos;
var match = re.exec(this);
if (!match) break;
pos = match.index;
if (pos <= startPos) lastSuccess = pos;
}
return lastSuccess;
}
Instances of the String
constructor have a .search()
method which accepts a RegExp and returns the index of the first match.
To start the search from a particular position (faking the second parameter of .indexOf()
) you can slice
off the first i
characters:
str.slice(i).search(/re/)
But this will get the index in the shorter string (after the first part was sliced off) so you'll want to then add the length of the chopped off part (i
) to the returned index if it wasn't -1
. This will give you the index in the original string:
function regexIndexOf(text, re, i) {
var indexInSuffix = text.slice(i).search(re);
return indexInSuffix < 0 ? indexInSuffix : indexInSuffix + i;
}
It does not natively, but you certainly can add this functionality
<script type="text/javascript">
String.prototype.regexIndexOf = function( pattern, startIndex )
{
startIndex = startIndex || 0;
var searchResult = this.substr( startIndex ).search( pattern );
return ( -1 === searchResult ) ? -1 : searchResult + startIndex;
}
String.prototype.regexLastIndexOf = function( pattern, startIndex )
{
startIndex = startIndex === undefined ? this.length : startIndex;
var searchResult = this.substr( 0, startIndex ).reverse().regexIndexOf( pattern, 0 );
return ( -1 === searchResult ) ? -1 : this.length - ++searchResult;
}
String.prototype.reverse = function()
{
return this.split('').reverse().join('');
}
// Indexes 0123456789
var str = 'caabbccdda';
alert( [
str.regexIndexOf( /[cd]/, 4 )
, str.regexLastIndexOf( /[cd]/, 4 )
, str.regexIndexOf( /[yz]/, 4 )
, str.regexLastIndexOf( /[yz]/, 4 )
, str.lastIndexOf( 'd', 4 )
, str.regexLastIndexOf( /d/, 4 )
, str.lastIndexOf( 'd' )
, str.regexLastIndexOf( /d/ )
]
);
</script>
I didn't fully test these methods, but they seem to work so far.
Based on BaileyP's answer. The main difference is that these methods return -1
if the pattern can't be matched.
Edit: Thanks to Jason Bunting's answer I got an idea. Why not modify the .lastIndex
property of the regex? Though this will only work for patterns with the global flag (/g
).
Edit: Updated to pass the test-cases.
String.prototype.regexIndexOf = function(re, startPos) {
startPos = startPos || 0;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
re.lastIndex = startPos;
var match = re.exec(this);
if (match) return match.index;
else return -1;
}
String.prototype.regexLastIndexOf = function(re, startPos) {
startPos = startPos === undefined ? this.length : startPos;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
var lastSuccess = -1;
for (var pos = 0; pos <= startPos; pos++) {
re.lastIndex = pos;
var match = re.exec(this);
if (!match) break;
pos = match.index;
if (pos <= startPos) lastSuccess = pos;
}
return lastSuccess;
}
var mystring = "abc ab a";
var re = new RegExp("ab"); // any regex here
if ( re.exec(mystring) != null ){
alert("matches"); // true in this case
}
Use standard regular expressions:
var re = new RegExp("^ab"); // At front
var re = new RegExp("ab$"); // At end
var re = new RegExp("ab(c|d)"); // abc or abd
Instances of the String
constructor have a .search()
method which accepts a RegExp and returns the index of the first match.
To start the search from a particular position (faking the second parameter of .indexOf()
) you can slice
off the first i
characters:
str.slice(i).search(/re/)
But this will get the index in the shorter string (after the first part was sliced off) so you'll want to then add the length of the chopped off part (i
) to the returned index if it wasn't -1
. This will give you the index in the original string:
function regexIndexOf(text, re, i) {
var indexInSuffix = text.slice(i).search(re);
return indexInSuffix < 0 ? indexInSuffix : indexInSuffix + i;
}
RexExp
instances have a lastIndex property already (if they are global) and so what I'm doing is copying the regular expression, modifying it slightly to suit our purposes, exec
-ing it on the string and looking at the lastIndex
. This will inevitably be faster than looping on the string. (You have enough examples of how to put this onto the string prototype, right?)
function reIndexOf(reIn, str, startIndex) {
var re = new RegExp(reIn.source, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
function reLastIndexOf(reIn, str, startIndex) {
var src = /\$$/.test(reIn.source) && !/\\\$$/.test(reIn.source) ? reIn.source : reIn.source + '(?![\\S\\s]*' + reIn.source + ')';
var re = new RegExp(src, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
reIndexOf(/[abc]/, "tommy can eat"); // Returns 6
reIndexOf(/[abc]/, "tommy can eat", 8); // Returns 11
reLastIndexOf(/[abc]/, "tommy can eat"); // Returns 11
You could also prototype the functions onto the RegExp object:
RegExp.prototype.indexOf = function(str, startIndex) {
var re = new RegExp(this.source, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
RegExp.prototype.lastIndexOf = function(str, startIndex) {
var src = /\$$/.test(this.source) && !/\\\$$/.test(this.source) ? this.source : this.source + '(?![\\S\\s]*' + this.source + ')';
var re = new RegExp(src, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
/[abc]/.indexOf("tommy can eat"); // Returns 6
/[abc]/.indexOf("tommy can eat", 8); // Returns 11
/[abc]/.lastIndexOf("tommy can eat"); // Returns 11
A quick explanation of how I am modifying the RegExp
: For indexOf
I just have to ensure that the global flag is set. For lastIndexOf
of I am using a negative look-ahead to find the last occurrence unless the RegExp
was already matching at the end of the string.
Well, as you are just looking to match the position of a character , regex is possibly overkill.
I presume all you want is, instead of "find first of these this character" , just find first of these characters.
This of course is the simple answer, but does what your question sets out to do, albeit without the regex part ( because you didn't clarify why specifically it had to be a regex )
function mIndexOf( str , chars, offset )
{
var first = -1;
for( var i = 0; i < chars.length; i++ )
{
var p = str.indexOf( chars[i] , offset );
if( p < first || first === -1 )
{
first = p;
}
}
return first;
}
String.prototype.mIndexOf = function( chars, offset )
{
return mIndexOf( this, chars, offset ); # I'm really averse to monkey patching.
};
mIndexOf( "hello world", ['a','o','w'], 0 );
>> 4
mIndexOf( "hello world", ['a'], 0 );
>> -1
mIndexOf( "hello world", ['a','o','w'], 4 );
>> 4
mIndexOf( "hello world", ['a','o','w'], 5 );
>> 6
mIndexOf( "hello world", ['a','o','w'], 7 );
>> -1
mIndexOf( "hello world", ['a','o','w','d'], 7 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 10 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 11 );
>> -1
You could use substr.
str.substr(i).match(/[abc]/);
After having all the proposed solutions fail my tests one way or the other, (edit: some were updated to pass the tests after I wrote this) I found the mozilla implementation for Array.indexOf and Array.lastIndexOf
I used those to implement my version of String.prototype.regexIndexOf and String.prototype.regexLastIndexOf as follows:
String.prototype.regexIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]) || 0;
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
for (; from < len; from++) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
String.prototype.regexLastIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]);
if (isNaN(from)) {
from = len - 1;
} else {
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
else if (from >= len)
from = len - 1;
}
for (; from > -1; from--) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
They seem to pass the test functions I provided in the question.
Obviously they only work if the regular expression matches one character but that is enough for my purpose since I will be using it for things like ( [abc] , \s , \W , \D )
I will keep monitoring the question in case someone provides a better/faster/cleaner/more generic implementation that works on any regular expression.
In certain simple cases, you can simplify your backwards search by using split.
function regexlast(string,re){
var tokens=string.split(re);
return (tokens.length>1)?(string.length-tokens[tokens.length-1].length):null;
}
This has a few serious problems:
But on the bright side it's way less code. For a constant-length regex that can't overlap (like /\s\w/
for finding word boundaries) this is good enough.
Well, as you are just looking to match the position of a character , regex is possibly overkill.
I presume all you want is, instead of "find first of these this character" , just find first of these characters.
This of course is the simple answer, but does what your question sets out to do, albeit without the regex part ( because you didn't clarify why specifically it had to be a regex )
function mIndexOf( str , chars, offset )
{
var first = -1;
for( var i = 0; i < chars.length; i++ )
{
var p = str.indexOf( chars[i] , offset );
if( p < first || first === -1 )
{
first = p;
}
}
return first;
}
String.prototype.mIndexOf = function( chars, offset )
{
return mIndexOf( this, chars, offset ); # I'm really averse to monkey patching.
};
mIndexOf( "hello world", ['a','o','w'], 0 );
>> 4
mIndexOf( "hello world", ['a'], 0 );
>> -1
mIndexOf( "hello world", ['a','o','w'], 4 );
>> 4
mIndexOf( "hello world", ['a','o','w'], 5 );
>> 6
mIndexOf( "hello world", ['a','o','w'], 7 );
>> -1
mIndexOf( "hello world", ['a','o','w','d'], 7 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 10 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 11 );
>> -1
It does not natively, but you certainly can add this functionality
<script type="text/javascript">
String.prototype.regexIndexOf = function( pattern, startIndex )
{
startIndex = startIndex || 0;
var searchResult = this.substr( startIndex ).search( pattern );
return ( -1 === searchResult ) ? -1 : searchResult + startIndex;
}
String.prototype.regexLastIndexOf = function( pattern, startIndex )
{
startIndex = startIndex === undefined ? this.length : startIndex;
var searchResult = this.substr( 0, startIndex ).reverse().regexIndexOf( pattern, 0 );
return ( -1 === searchResult ) ? -1 : this.length - ++searchResult;
}
String.prototype.reverse = function()
{
return this.split('').reverse().join('');
}
// Indexes 0123456789
var str = 'caabbccdda';
alert( [
str.regexIndexOf( /[cd]/, 4 )
, str.regexLastIndexOf( /[cd]/, 4 )
, str.regexIndexOf( /[yz]/, 4 )
, str.regexLastIndexOf( /[yz]/, 4 )
, str.lastIndexOf( 'd', 4 )
, str.regexLastIndexOf( /d/, 4 )
, str.lastIndexOf( 'd' )
, str.regexLastIndexOf( /d/ )
]
);
</script>
I didn't fully test these methods, but they seem to work so far.
You could use substr.
str.substr(i).match(/[abc]/);
There are still no native methods that perform the requested task.
Here is the code that I am using. It mimics the behaviour of String.prototype.indexOf and String.prototype.lastIndexOf methods but they also accept a RegExp as the search argument in addition to a string representing the value to search for.
Yes it is quite long as an answer goes as it tries to follow current standards as close as possible and of course contains a reasonable amount of JSDOC comments. However, once minified, the code is only 2.27k and once gzipped for transmission it is only 1023 bytes.
The 2 methods that this adds to String.prototype
(using Object.defineProperty where available) are:
searchOf
searchLastOf
It passes all the tests that the OP posted and additionally I have tested the routines quite thoroughly in my daily usage, and have attempted to be sure that they work across multiple environments, but feedback/issues are always welcome.
/*jslint maxlen:80, browser:true */_x000D_
_x000D_
/*_x000D_
* Properties used by searchOf and searchLastOf implementation._x000D_
*/_x000D_
_x000D_
/*property_x000D_
MAX_SAFE_INTEGER, abs, add, apply, call, configurable, defineProperty,_x000D_
enumerable, exec, floor, global, hasOwnProperty, ignoreCase, index,_x000D_
lastIndex, lastIndexOf, length, max, min, multiline, pow, prototype,_x000D_
remove, replace, searchLastOf, searchOf, source, toString, value, writable_x000D_
*/_x000D_
_x000D_
/*_x000D_
* Properties used in the testing of searchOf and searchLastOf implimentation._x000D_
*/_x000D_
_x000D_
/*property_x000D_
appendChild, createTextNode, getElementById, indexOf, lastIndexOf, length,_x000D_
searchLastOf, searchOf, unshift_x000D_
*/_x000D_
_x000D_
(function () {_x000D_
'use strict';_x000D_
_x000D_
var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1,_x000D_
getNativeFlags = new RegExp('\\/([a-z]*)$', 'i'),_x000D_
clipDups = new RegExp('([\\s\\S])(?=[\\s\\S]*\\1)', 'g'),_x000D_
pToString = Object.prototype.toString,_x000D_
pHasOwn = Object.prototype.hasOwnProperty,_x000D_
stringTagRegExp;_x000D_
_x000D_
/**_x000D_
* Defines a new property directly on an object, or modifies an existing_x000D_
* property on an object, and returns the object._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {Object} object_x000D_
* @param {string} property_x000D_
* @param {Object} descriptor_x000D_
* @returns {Object}_x000D_
* @see https://goo.gl/CZnEqg_x000D_
*/_x000D_
function $defineProperty(object, property, descriptor) {_x000D_
if (Object.defineProperty) {_x000D_
Object.defineProperty(object, property, descriptor);_x000D_
} else {_x000D_
object[property] = descriptor.value;_x000D_
}_x000D_
_x000D_
return object;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns true if the operands are strictly equal with no type conversion._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} a_x000D_
* @param {*} b_x000D_
* @returns {boolean}_x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-11.9.4_x000D_
*/_x000D_
function $strictEqual(a, b) {_x000D_
return a === b;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns true if the operand inputArg is undefined._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {boolean}_x000D_
*/_x000D_
function $isUndefined(inputArg) {_x000D_
return $strictEqual(typeof inputArg, 'undefined');_x000D_
}_x000D_
_x000D_
/**_x000D_
* Provides a string representation of the supplied object in the form_x000D_
* "[object type]", where type is the object type._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object for which a class string represntation_x000D_
* is required._x000D_
* @returns {string} A string value of the form "[object type]"._x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-15.2.4.2_x000D_
*/_x000D_
function $toStringTag(inputArg) {_x000D_
var val;_x000D_
if (inputArg === null) {_x000D_
val = '[object Null]';_x000D_
} else if ($isUndefined(inputArg)) {_x000D_
val = '[object Undefined]';_x000D_
} else {_x000D_
val = pToString.call(inputArg);_x000D_
}_x000D_
_x000D_
return val;_x000D_
}_x000D_
_x000D_
/**_x000D_
* The string tag representation of a RegExp object._x000D_
*_x000D_
* @private_x000D_
* @type {string}_x000D_
*/_x000D_
stringTagRegExp = $toStringTag(getNativeFlags);_x000D_
_x000D_
/**_x000D_
* Returns true if the operand inputArg is a RegExp._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {boolean}_x000D_
*/_x000D_
function $isRegExp(inputArg) {_x000D_
return $toStringTag(inputArg) === stringTagRegExp &&_x000D_
pHasOwn.call(inputArg, 'ignoreCase') &&_x000D_
typeof inputArg.ignoreCase === 'boolean' &&_x000D_
pHasOwn.call(inputArg, 'global') &&_x000D_
typeof inputArg.global === 'boolean' &&_x000D_
pHasOwn.call(inputArg, 'multiline') &&_x000D_
typeof inputArg.multiline === 'boolean' &&_x000D_
pHasOwn.call(inputArg, 'source') &&_x000D_
typeof inputArg.source === 'string';_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation throws an error if its argument is a value that_x000D_
* cannot be converted to an Object, otherwise returns the argument._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object to be tested._x000D_
* @throws {TypeError} If inputArg is null or undefined._x000D_
* @returns {*} The inputArg if coercible._x000D_
* @see https://goo.gl/5GcmVq_x000D_
*/_x000D_
function $requireObjectCoercible(inputArg) {_x000D_
var errStr;_x000D_
_x000D_
if (inputArg === null || $isUndefined(inputArg)) {_x000D_
errStr = 'Cannot convert argument to object: ' + inputArg;_x000D_
throw new TypeError(errStr);_x000D_
}_x000D_
_x000D_
return inputArg;_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation converts its argument to a value of type string_x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {string}_x000D_
* @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tostring_x000D_
*/_x000D_
function $toString(inputArg) {_x000D_
var type,_x000D_
val;_x000D_
_x000D_
if (inputArg === null) {_x000D_
val = 'null';_x000D_
} else {_x000D_
type = typeof inputArg;_x000D_
if (type === 'string') {_x000D_
val = inputArg;_x000D_
} else if (type === 'undefined') {_x000D_
val = type;_x000D_
} else {_x000D_
if (type === 'symbol') {_x000D_
throw new TypeError('Cannot convert symbol to string');_x000D_
}_x000D_
_x000D_
val = String(inputArg);_x000D_
}_x000D_
}_x000D_
_x000D_
return val;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns a string only if the arguments is coercible otherwise throws an_x000D_
* error._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @throws {TypeError} If inputArg is null or undefined._x000D_
* @returns {string}_x000D_
*/_x000D_
function $onlyCoercibleToString(inputArg) {_x000D_
return $toString($requireObjectCoercible(inputArg));_x000D_
}_x000D_
_x000D_
/**_x000D_
* The function evaluates the passed value and converts it to an integer._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object to be converted to an integer._x000D_
* @returns {number} If the target value is NaN, null or undefined, 0 is_x000D_
* returned. If the target value is false, 0 is returned_x000D_
* and if true, 1 is returned._x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.4_x000D_
*/_x000D_
function $toInteger(inputArg) {_x000D_
var number = +inputArg,_x000D_
val = 0;_x000D_
_x000D_
if ($strictEqual(number, number)) {_x000D_
if (!number || number === Infinity || number === -Infinity) {_x000D_
val = number;_x000D_
} else {_x000D_
val = (number > 0 || -1) * Math.floor(Math.abs(number));_x000D_
}_x000D_
}_x000D_
_x000D_
return val;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Copies a regex object. Allows adding and removing native flags while_x000D_
* copying the regex._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {RegExp} regex Regex to copy._x000D_
* @param {Object} [options] Allows specifying native flags to add or_x000D_
* remove while copying the regex._x000D_
* @returns {RegExp} Copy of the provided regex, possibly with modified_x000D_
* flags._x000D_
*/_x000D_
function $copyRegExp(regex, options) {_x000D_
var flags,_x000D_
opts,_x000D_
rx;_x000D_
_x000D_
if (options !== null && typeof options === 'object') {_x000D_
opts = options;_x000D_
} else {_x000D_
opts = {};_x000D_
}_x000D_
_x000D_
// Get native flags in use_x000D_
flags = getNativeFlags.exec($toString(regex))[1];_x000D_
flags = $onlyCoercibleToString(flags);_x000D_
if (opts.add) {_x000D_
flags += opts.add;_x000D_
flags = flags.replace(clipDups, '');_x000D_
}_x000D_
_x000D_
if (opts.remove) {_x000D_
// Would need to escape `options.remove` if this was public_x000D_
rx = new RegExp('[' + opts.remove + ']+', 'g');_x000D_
flags = flags.replace(rx, '');_x000D_
}_x000D_
_x000D_
return new RegExp(regex.source, flags);_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation ToLength converts its argument to an integer_x000D_
* suitable for use as the length of an array-like object._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The object to be converted to a length._x000D_
* @returns {number} If len <= +0 then +0 else if len is +INFINITY then_x000D_
* 2^53-1 else min(len, 2^53-1)._x000D_
* @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tolength_x000D_
*/_x000D_
function $toLength(inputArg) {_x000D_
return Math.min(Math.max($toInteger(inputArg), 0), MAX_SAFE_INTEGER);_x000D_
}_x000D_
_x000D_
/**_x000D_
* Copies a regex object so that it is suitable for use with searchOf and_x000D_
* searchLastOf methods._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {RegExp} regex Regex to copy._x000D_
* @returns {RegExp}_x000D_
*/_x000D_
function $toSearchRegExp(regex) {_x000D_
return $copyRegExp(regex, {_x000D_
add: 'g',_x000D_
remove: 'y'_x000D_
});_x000D_
}_x000D_
_x000D_
/**_x000D_
* Returns true if the operand inputArg is a member of one of the types_x000D_
* Undefined, Null, Boolean, Number, Symbol, or String._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg_x000D_
* @returns {boolean}_x000D_
* @see https://goo.gl/W68ywJ_x000D_
* @see https://goo.gl/ev7881_x000D_
*/_x000D_
function $isPrimitive(inputArg) {_x000D_
var type = typeof inputArg;_x000D_
_x000D_
return type === 'undefined' ||_x000D_
inputArg === null ||_x000D_
type === 'boolean' ||_x000D_
type === 'string' ||_x000D_
type === 'number' ||_x000D_
type === 'symbol';_x000D_
}_x000D_
_x000D_
/**_x000D_
* The abstract operation converts its argument to a value of type Object_x000D_
* but fixes some environment bugs._x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {*} inputArg The argument to be converted to an object._x000D_
* @throws {TypeError} If inputArg is not coercible to an object._x000D_
* @returns {Object} Value of inputArg as type Object._x000D_
* @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.9_x000D_
*/_x000D_
function $toObject(inputArg) {_x000D_
var object;_x000D_
_x000D_
if ($isPrimitive($requireObjectCoercible(inputArg))) {_x000D_
object = Object(inputArg);_x000D_
} else {_x000D_
object = inputArg;_x000D_
}_x000D_
_x000D_
return object;_x000D_
}_x000D_
_x000D_
/**_x000D_
* Converts a single argument that is an array-like object or list (eg._x000D_
* arguments, NodeList, DOMTokenList (used by classList), NamedNodeMap_x000D_
* (used by attributes property)) into a new Array() and returns it._x000D_
* This is a partial implementation of the ES6 Array.from_x000D_
*_x000D_
* @private_x000D_
* @function_x000D_
* @param {Object} arrayLike_x000D_
* @returns {Array}_x000D_
*/_x000D_
function $toArray(arrayLike) {_x000D_
var object = $toObject(arrayLike),_x000D_
length = $toLength(object.length),_x000D_
array = [],_x000D_
index = 0;_x000D_
_x000D_
array.length = length;_x000D_
while (index < length) {_x000D_
array[index] = object[index];_x000D_
index += 1;_x000D_
}_x000D_
_x000D_
return array;_x000D_
}_x000D_
_x000D_
if (!String.prototype.searchOf) {_x000D_
/**_x000D_
* This method returns the index within the calling String object of_x000D_
* the first occurrence of the specified value, starting the search at_x000D_
* fromIndex. Returns -1 if the value is not found._x000D_
*_x000D_
* @function_x000D_
* @this {string}_x000D_
* @param {RegExp|string} regex A regular expression object or a String._x000D_
* Anything else is implicitly converted to_x000D_
* a String._x000D_
* @param {Number} [fromIndex] The location within the calling string_x000D_
* to start the search from. It can be any_x000D_
* integer. The default value is 0. If_x000D_
* fromIndex < 0 the entire string is_x000D_
* searched (same as passing 0). If_x000D_
* fromIndex >= str.length, the method will_x000D_
* return -1 unless searchValue is an empty_x000D_
* string in which case str.length is_x000D_
* returned._x000D_
* @returns {Number} If successful, returns the index of the first_x000D_
* match of the regular expression inside the_x000D_
* string. Otherwise, it returns -1._x000D_
*/_x000D_
$defineProperty(String.prototype, 'searchOf', {_x000D_
enumerable: false,_x000D_
configurable: true,_x000D_
writable: true,_x000D_
value: function (regex) {_x000D_
var str = $onlyCoercibleToString(this),_x000D_
args = $toArray(arguments),_x000D_
result = -1,_x000D_
fromIndex,_x000D_
match,_x000D_
rx;_x000D_
_x000D_
if (!$isRegExp(regex)) {_x000D_
return String.prototype.indexOf.apply(str, args);_x000D_
}_x000D_
_x000D_
if ($toLength(args.length) > 1) {_x000D_
fromIndex = +args[1];_x000D_
if (fromIndex < 0) {_x000D_
fromIndex = 0;_x000D_
}_x000D_
} else {_x000D_
fromIndex = 0;_x000D_
}_x000D_
_x000D_
if (fromIndex >= $toLength(str.length)) {_x000D_
return result;_x000D_
}_x000D_
_x000D_
rx = $toSearchRegExp(regex);_x000D_
rx.lastIndex = fromIndex;_x000D_
match = rx.exec(str);_x000D_
if (match) {_x000D_
result = +match.index;_x000D_
}_x000D_
_x000D_
return result;_x000D_
}_x000D_
});_x000D_
}_x000D_
_x000D_
if (!String.prototype.searchLastOf) {_x000D_
/**_x000D_
* This method returns the index within the calling String object of_x000D_
* the last occurrence of the specified value, or -1 if not found._x000D_
* The calling string is searched backward, starting at fromIndex._x000D_
*_x000D_
* @function_x000D_
* @this {string}_x000D_
* @param {RegExp|string} regex A regular expression object or a String._x000D_
* Anything else is implicitly converted to_x000D_
* a String._x000D_
* @param {Number} [fromIndex] Optional. The location within the_x000D_
* calling string to start the search at,_x000D_
* indexed from left to right. It can be_x000D_
* any integer. The default value is_x000D_
* str.length. If it is negative, it is_x000D_
* treated as 0. If fromIndex > str.length,_x000D_
* fromIndex is treated as str.length._x000D_
* @returns {Number} If successful, returns the index of the first_x000D_
* match of the regular expression inside the_x000D_
* string. Otherwise, it returns -1._x000D_
*/_x000D_
$defineProperty(String.prototype, 'searchLastOf', {_x000D_
enumerable: false,_x000D_
configurable: true,_x000D_
writable: true,_x000D_
value: function (regex) {_x000D_
var str = $onlyCoercibleToString(this),_x000D_
args = $toArray(arguments),_x000D_
result = -1,_x000D_
fromIndex,_x000D_
length,_x000D_
match,_x000D_
pos,_x000D_
rx;_x000D_
_x000D_
if (!$isRegExp(regex)) {_x000D_
return String.prototype.lastIndexOf.apply(str, args);_x000D_
}_x000D_
_x000D_
length = $toLength(str.length);_x000D_
if (!$strictEqual(args[1], args[1])) {_x000D_
fromIndex = length;_x000D_
} else {_x000D_
if ($toLength(args.length) > 1) {_x000D_
fromIndex = $toInteger(args[1]);_x000D_
} else {_x000D_
fromIndex = length - 1;_x000D_
}_x000D_
}_x000D_
_x000D_
if (fromIndex >= 0) {_x000D_
fromIndex = Math.min(fromIndex, length - 1);_x000D_
} else {_x000D_
fromIndex = length - Math.abs(fromIndex);_x000D_
}_x000D_
_x000D_
pos = 0;_x000D_
rx = $toSearchRegExp(regex);_x000D_
while (pos <= fromIndex) {_x000D_
rx.lastIndex = pos;_x000D_
match = rx.exec(str);_x000D_
if (!match) {_x000D_
break;_x000D_
}_x000D_
_x000D_
pos = +match.index;_x000D_
if (pos <= fromIndex) {_x000D_
result = pos;_x000D_
}_x000D_
_x000D_
pos += 1;_x000D_
}_x000D_
_x000D_
return result;_x000D_
}_x000D_
});_x000D_
}_x000D_
}());_x000D_
_x000D_
(function () {_x000D_
'use strict';_x000D_
_x000D_
/*_x000D_
* testing as follow to make sure that at least for one character regexp,_x000D_
* the result is the same as if we used indexOf_x000D_
*/_x000D_
_x000D_
var pre = document.getElementById('out');_x000D_
_x000D_
function log(result) {_x000D_
pre.appendChild(document.createTextNode(result + '\n'));_x000D_
}_x000D_
_x000D_
function test(str) {_x000D_
var i = str.length + 2,_x000D_
r,_x000D_
a,_x000D_
b;_x000D_
_x000D_
while (i) {_x000D_
a = str.indexOf('a', i);_x000D_
b = str.searchOf(/a/, i);_x000D_
r = ['Failed', 'searchOf', str, i, a, b];_x000D_
if (a === b) {_x000D_
r[0] = 'Passed';_x000D_
}_x000D_
_x000D_
log(r);_x000D_
a = str.lastIndexOf('a', i);_x000D_
b = str.searchLastOf(/a/, i);_x000D_
r = ['Failed', 'searchLastOf', str, i, a, b];_x000D_
if (a === b) {_x000D_
r[0] = 'Passed';_x000D_
}_x000D_
_x000D_
log(r);_x000D_
i -= 1;_x000D_
}_x000D_
}_x000D_
_x000D_
/*_x000D_
* Look for the a among the xes_x000D_
*/_x000D_
_x000D_
test('xxx');_x000D_
test('axx');_x000D_
test('xax');_x000D_
test('xxa');_x000D_
test('axa');_x000D_
test('xaa');_x000D_
test('aax');_x000D_
test('aaa');_x000D_
}());
_x000D_
<pre id="out"></pre>
_x000D_
RexExp
instances have a lastIndex property already (if they are global) and so what I'm doing is copying the regular expression, modifying it slightly to suit our purposes, exec
-ing it on the string and looking at the lastIndex
. This will inevitably be faster than looping on the string. (You have enough examples of how to put this onto the string prototype, right?)
function reIndexOf(reIn, str, startIndex) {
var re = new RegExp(reIn.source, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
function reLastIndexOf(reIn, str, startIndex) {
var src = /\$$/.test(reIn.source) && !/\\\$$/.test(reIn.source) ? reIn.source : reIn.source + '(?![\\S\\s]*' + reIn.source + ')';
var re = new RegExp(src, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
reIndexOf(/[abc]/, "tommy can eat"); // Returns 6
reIndexOf(/[abc]/, "tommy can eat", 8); // Returns 11
reLastIndexOf(/[abc]/, "tommy can eat"); // Returns 11
You could also prototype the functions onto the RegExp object:
RegExp.prototype.indexOf = function(str, startIndex) {
var re = new RegExp(this.source, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
RegExp.prototype.lastIndexOf = function(str, startIndex) {
var src = /\$$/.test(this.source) && !/\\\$$/.test(this.source) ? this.source : this.source + '(?![\\S\\s]*' + this.source + ')';
var re = new RegExp(src, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
/[abc]/.indexOf("tommy can eat"); // Returns 6
/[abc]/.indexOf("tommy can eat", 8); // Returns 11
/[abc]/.lastIndexOf("tommy can eat"); // Returns 11
A quick explanation of how I am modifying the RegExp
: For indexOf
I just have to ensure that the global flag is set. For lastIndexOf
of I am using a negative look-ahead to find the last occurrence unless the RegExp
was already matching at the end of the string.
Jason Bunting's last index of does not work. Mine is not optimal, but it works.
//Jason Bunting's
String.prototype.regexIndexOf = function(regex, startpos) {
var indexOf = this.substring(startpos || 0).search(regex);
return (indexOf >= 0) ? (indexOf + (startpos || 0)) : indexOf;
}
String.prototype.regexLastIndexOf = function(regex, startpos) {
var lastIndex = -1;
var index = this.regexIndexOf( regex );
startpos = startpos === undefined ? this.length : startpos;
while ( index >= 0 && index < startpos )
{
lastIndex = index;
index = this.regexIndexOf( regex, index + 1 );
}
return lastIndex;
}
Based on BaileyP's answer. The main difference is that these methods return -1
if the pattern can't be matched.
Edit: Thanks to Jason Bunting's answer I got an idea. Why not modify the .lastIndex
property of the regex? Though this will only work for patterns with the global flag (/g
).
Edit: Updated to pass the test-cases.
String.prototype.regexIndexOf = function(re, startPos) {
startPos = startPos || 0;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
re.lastIndex = startPos;
var match = re.exec(this);
if (match) return match.index;
else return -1;
}
String.prototype.regexLastIndexOf = function(re, startPos) {
startPos = startPos === undefined ? this.length : startPos;
if (!re.global) {
var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
re = new RegExp(re.source, flags);
}
var lastSuccess = -1;
for (var pos = 0; pos <= startPos; pos++) {
re.lastIndex = pos;
var match = re.exec(this);
if (!match) break;
pos = match.index;
if (pos <= startPos) lastSuccess = pos;
}
return lastSuccess;
}
var mystring = "abc ab a";
var re = new RegExp("ab"); // any regex here
if ( re.exec(mystring) != null ){
alert("matches"); // true in this case
}
Use standard regular expressions:
var re = new RegExp("^ab"); // At front
var re = new RegExp("ab$"); // At end
var re = new RegExp("ab(c|d)"); // abc or abd
I needed a regexIndexOf
function also for an array, so I programed one myself. However I doubt, that it's optimized, but i guess it should work properly.
Array.prototype.regexIndexOf = function (regex, startpos = 0) {
len = this.length;
for(x = startpos; x < len; x++){
if(typeof this[x] != 'undefined' && (''+this[x]).match(regex)){
return x;
}
}
return -1;
}
arr = [];
arr.push(null);
arr.push(NaN);
arr[3] = 7;
arr.push('asdf');
arr.push('qwer');
arr.push(9);
arr.push('...');
console.log(arr);
arr.regexIndexOf(/\d/, 4);
RexExp
instances have a lastIndex property already (if they are global) and so what I'm doing is copying the regular expression, modifying it slightly to suit our purposes, exec
-ing it on the string and looking at the lastIndex
. This will inevitably be faster than looping on the string. (You have enough examples of how to put this onto the string prototype, right?)
function reIndexOf(reIn, str, startIndex) {
var re = new RegExp(reIn.source, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
function reLastIndexOf(reIn, str, startIndex) {
var src = /\$$/.test(reIn.source) && !/\\\$$/.test(reIn.source) ? reIn.source : reIn.source + '(?![\\S\\s]*' + reIn.source + ')';
var re = new RegExp(src, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
reIndexOf(/[abc]/, "tommy can eat"); // Returns 6
reIndexOf(/[abc]/, "tommy can eat", 8); // Returns 11
reLastIndexOf(/[abc]/, "tommy can eat"); // Returns 11
You could also prototype the functions onto the RegExp object:
RegExp.prototype.indexOf = function(str, startIndex) {
var re = new RegExp(this.source, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
RegExp.prototype.lastIndexOf = function(str, startIndex) {
var src = /\$$/.test(this.source) && !/\\\$$/.test(this.source) ? this.source : this.source + '(?![\\S\\s]*' + this.source + ')';
var re = new RegExp(src, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
/[abc]/.indexOf("tommy can eat"); // Returns 6
/[abc]/.indexOf("tommy can eat", 8); // Returns 11
/[abc]/.lastIndexOf("tommy can eat"); // Returns 11
A quick explanation of how I am modifying the RegExp
: For indexOf
I just have to ensure that the global flag is set. For lastIndexOf
of I am using a negative look-ahead to find the last occurrence unless the RegExp
was already matching at the end of the string.
Jason Bunting's last index of does not work. Mine is not optimal, but it works.
//Jason Bunting's
String.prototype.regexIndexOf = function(regex, startpos) {
var indexOf = this.substring(startpos || 0).search(regex);
return (indexOf >= 0) ? (indexOf + (startpos || 0)) : indexOf;
}
String.prototype.regexLastIndexOf = function(regex, startpos) {
var lastIndex = -1;
var index = this.regexIndexOf( regex );
startpos = startpos === undefined ? this.length : startpos;
while ( index >= 0 && index < startpos )
{
lastIndex = index;
index = this.regexIndexOf( regex, index + 1 );
}
return lastIndex;
}
For data with sparse matches, using string.search is the fastest across browsers. It re-slices a string each iteration to :
function lastIndexOfSearch(string, regex, index) {
if(index === 0 || index)
string = string.slice(0, Math.max(0,index));
var idx;
var offset = -1;
while ((idx = string.search(regex)) !== -1) {
offset += idx + 1;
string = string.slice(idx + 1);
}
return offset;
}
For dense data I made this. It's complex compared to the execute method, but for dense data, it's 2-10x faster than every other method I tried, and about 100x faster than the accepted solution. The main points are:
The new regex is executed and the results from either that exec, or the first exec, are returned;
function lastIndexOfGroupSimple(string, regex, index) {
if (index === 0 || index) string = string.slice(0, Math.max(0, index + 1));
regex.lastIndex = 0;
var lastRegex, index
flags = 'g' + (regex.multiline ? 'm' : '') + (regex.ignoreCase ? 'i' : ''),
key = regex.source + '$' + flags,
match = regex.exec(string);
if (!match) return -1;
if (lastIndexOfGroupSimple.cache === undefined) lastIndexOfGroupSimple.cache = {};
lastRegex = lastIndexOfGroupSimple.cache[key];
if (!lastRegex)
lastIndexOfGroupSimple.cache[key] = lastRegex = new RegExp('.*(' + regex.source + ')(?!.*?' + regex.source + ')', flags);
index = match.index;
lastRegex.lastIndex = match.index;
return (match = lastRegex.exec(string)) ? lastRegex.lastIndex - match[1].length : index;
};
I don't understand the purpose of the tests up top. Situations that require a regex are impossible to compare against a call to indexOf, which I think is the point of making the method in the first place. To get the test to pass, it makes more sense to use 'xxx+(?!x)', than adjust the way the regex iterates.
Instances of the String
constructor have a .search()
method which accepts a RegExp and returns the index of the first match.
To start the search from a particular position (faking the second parameter of .indexOf()
) you can slice
off the first i
characters:
str.slice(i).search(/re/)
But this will get the index in the shorter string (after the first part was sliced off) so you'll want to then add the length of the chopped off part (i
) to the returned index if it wasn't -1
. This will give you the index in the original string:
function regexIndexOf(text, re, i) {
var indexInSuffix = text.slice(i).search(re);
return indexInSuffix < 0 ? indexInSuffix : indexInSuffix + i;
}
If you are looking for a very simple lastIndex lookup with RegExp and don't care if it mimics lastIndexOf to the last detail, this may catch your attention.
I simply reverse the string, and subtract the first occurence index from length - 1. It happens to pass my test, but I think there could arise a performance issue with long strings.
interface String {
reverse(): string;
lastIndex(regex: RegExp): number;
}
String.prototype.reverse = function(this: string) {
return this.split("")
.reverse()
.join("");
};
String.prototype.lastIndex = function(this: string, regex: RegExp) {
const exec = regex.exec(this.reverse());
return exec === null ? -1 : this.length - 1 - exec.index;
};
Well, as you are just looking to match the position of a character , regex is possibly overkill.
I presume all you want is, instead of "find first of these this character" , just find first of these characters.
This of course is the simple answer, but does what your question sets out to do, albeit without the regex part ( because you didn't clarify why specifically it had to be a regex )
function mIndexOf( str , chars, offset )
{
var first = -1;
for( var i = 0; i < chars.length; i++ )
{
var p = str.indexOf( chars[i] , offset );
if( p < first || first === -1 )
{
first = p;
}
}
return first;
}
String.prototype.mIndexOf = function( chars, offset )
{
return mIndexOf( this, chars, offset ); # I'm really averse to monkey patching.
};
mIndexOf( "hello world", ['a','o','w'], 0 );
>> 4
mIndexOf( "hello world", ['a'], 0 );
>> -1
mIndexOf( "hello world", ['a','o','w'], 4 );
>> 4
mIndexOf( "hello world", ['a','o','w'], 5 );
>> 6
mIndexOf( "hello world", ['a','o','w'], 7 );
>> -1
mIndexOf( "hello world", ['a','o','w','d'], 7 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 10 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 11 );
>> -1
After having all the proposed solutions fail my tests one way or the other, (edit: some were updated to pass the tests after I wrote this) I found the mozilla implementation for Array.indexOf and Array.lastIndexOf
I used those to implement my version of String.prototype.regexIndexOf and String.prototype.regexLastIndexOf as follows:
String.prototype.regexIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]) || 0;
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
for (; from < len; from++) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
String.prototype.regexLastIndexOf = function(elt /*, from*/)
{
var arr = this.split('');
var len = arr.length;
var from = Number(arguments[1]);
if (isNaN(from)) {
from = len - 1;
} else {
from = (from < 0) ? Math.ceil(from) : Math.floor(from);
if (from < 0)
from += len;
else if (from >= len)
from = len - 1;
}
for (; from > -1; from--) {
if (from in arr && elt.exec(arr[from]) )
return from;
}
return -1;
};
They seem to pass the test functions I provided in the question.
Obviously they only work if the regular expression matches one character but that is enough for my purpose since I will be using it for things like ( [abc] , \s , \W , \D )
I will keep monitoring the question in case someone provides a better/faster/cleaner/more generic implementation that works on any regular expression.
Well, as you are just looking to match the position of a character , regex is possibly overkill.
I presume all you want is, instead of "find first of these this character" , just find first of these characters.
This of course is the simple answer, but does what your question sets out to do, albeit without the regex part ( because you didn't clarify why specifically it had to be a regex )
function mIndexOf( str , chars, offset )
{
var first = -1;
for( var i = 0; i < chars.length; i++ )
{
var p = str.indexOf( chars[i] , offset );
if( p < first || first === -1 )
{
first = p;
}
}
return first;
}
String.prototype.mIndexOf = function( chars, offset )
{
return mIndexOf( this, chars, offset ); # I'm really averse to monkey patching.
};
mIndexOf( "hello world", ['a','o','w'], 0 );
>> 4
mIndexOf( "hello world", ['a'], 0 );
>> -1
mIndexOf( "hello world", ['a','o','w'], 4 );
>> 4
mIndexOf( "hello world", ['a','o','w'], 5 );
>> 6
mIndexOf( "hello world", ['a','o','w'], 7 );
>> -1
mIndexOf( "hello world", ['a','o','w','d'], 7 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 10 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 11 );
>> -1
I used String.prototype.match(regex)
which returns a string array of all found matches of the given regex
in the string (more info see here):
function getLastIndex(text, regex, limit = text.length) {_x000D_
const matches = text.match(regex);_x000D_
_x000D_
// no matches found_x000D_
if (!matches) {_x000D_
return -1;_x000D_
}_x000D_
_x000D_
// matches found but first index greater than limit_x000D_
if (text.indexOf(matches[0] + matches[0].length) > limit) {_x000D_
return -1;_x000D_
}_x000D_
_x000D_
// reduce index until smaller than limit_x000D_
let i = matches.length - 1;_x000D_
let index = text.lastIndexOf(matches[i]);_x000D_
while (index > limit && i >= 0) {_x000D_
i--;_x000D_
index = text.lastIndexOf(matches[i]);_x000D_
}_x000D_
return index > limit ? -1 : index;_x000D_
}_x000D_
_x000D_
// expect -1 as first index === 14_x000D_
console.log(getLastIndex('First Sentence. Last Sentence. Unfinished', /\. /g, 10));_x000D_
_x000D_
// expect 29_x000D_
console.log(getLastIndex('First Sentence. Last Sentence. Unfinished', /\. /g));
_x000D_
It does not natively, but you certainly can add this functionality
<script type="text/javascript">
String.prototype.regexIndexOf = function( pattern, startIndex )
{
startIndex = startIndex || 0;
var searchResult = this.substr( startIndex ).search( pattern );
return ( -1 === searchResult ) ? -1 : searchResult + startIndex;
}
String.prototype.regexLastIndexOf = function( pattern, startIndex )
{
startIndex = startIndex === undefined ? this.length : startIndex;
var searchResult = this.substr( 0, startIndex ).reverse().regexIndexOf( pattern, 0 );
return ( -1 === searchResult ) ? -1 : this.length - ++searchResult;
}
String.prototype.reverse = function()
{
return this.split('').reverse().join('');
}
// Indexes 0123456789
var str = 'caabbccdda';
alert( [
str.regexIndexOf( /[cd]/, 4 )
, str.regexLastIndexOf( /[cd]/, 4 )
, str.regexIndexOf( /[yz]/, 4 )
, str.regexLastIndexOf( /[yz]/, 4 )
, str.lastIndexOf( 'd', 4 )
, str.regexLastIndexOf( /d/, 4 )
, str.lastIndexOf( 'd' )
, str.regexLastIndexOf( /d/ )
]
);
</script>
I didn't fully test these methods, but they seem to work so far.
You could use substr.
str.substr(i).match(/[abc]/);
You could use substr.
str.substr(i).match(/[abc]/);
RexExp
instances have a lastIndex property already (if they are global) and so what I'm doing is copying the regular expression, modifying it slightly to suit our purposes, exec
-ing it on the string and looking at the lastIndex
. This will inevitably be faster than looping on the string. (You have enough examples of how to put this onto the string prototype, right?)
function reIndexOf(reIn, str, startIndex) {
var re = new RegExp(reIn.source, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
function reLastIndexOf(reIn, str, startIndex) {
var src = /\$$/.test(reIn.source) && !/\\\$$/.test(reIn.source) ? reIn.source : reIn.source + '(?![\\S\\s]*' + reIn.source + ')';
var re = new RegExp(src, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
reIndexOf(/[abc]/, "tommy can eat"); // Returns 6
reIndexOf(/[abc]/, "tommy can eat", 8); // Returns 11
reLastIndexOf(/[abc]/, "tommy can eat"); // Returns 11
You could also prototype the functions onto the RegExp object:
RegExp.prototype.indexOf = function(str, startIndex) {
var re = new RegExp(this.source, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
RegExp.prototype.lastIndexOf = function(str, startIndex) {
var src = /\$$/.test(this.source) && !/\\\$$/.test(this.source) ? this.source : this.source + '(?![\\S\\s]*' + this.source + ')';
var re = new RegExp(src, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
re.lastIndex = startIndex || 0;
var res = re.exec(str);
if(!res) return -1;
return re.lastIndex - res[0].length;
};
/[abc]/.indexOf("tommy can eat"); // Returns 6
/[abc]/.indexOf("tommy can eat", 8); // Returns 11
/[abc]/.lastIndexOf("tommy can eat"); // Returns 11
A quick explanation of how I am modifying the RegExp
: For indexOf
I just have to ensure that the global flag is set. For lastIndexOf
of I am using a negative look-ahead to find the last occurrence unless the RegExp
was already matching at the end of the string.
Source: Stackoverflow.com