I'm trying to find the positions of all occurrences of a string in another string, case-insensitive.
For example, given the string:
I learned to play the Ukulele in Lebanon.
and the search string le
, I want to obtain the array:
[2, 25, 27, 33]
Both strings will be variables - i.e., I can't hard-code their values.
I figured that this was an easy task for regular expressions, but after struggling for a while to find one that would work, I've had no luck.
I found this example of how to accomplish this using .indexOf()
, but surely there has to be a more concise way to do it?
This question is related to
javascript
regex
string
indexof
I would recommend Tim's answer. However, this comment by @blazs states "Suppose searchStr=aaa
and that str=aaaaaa
. Then instead of finding 4 occurences your code will find only 2 because you're making skips by searchStr.length in the loop.", which is true by looking at Tim's code, specifically this line here: startIndex = index + searchStrLen;
Tim's code would not be able to find an instance of the string that's being searched that is within the length of itself. So, I've modified Tim's answer:
function getIndicesOf(searchStr, str, caseSensitive) {
var startIndex = 0, index, indices = [];
if (!caseSensitive) {
str = str.toLowerCase();
searchStr = searchStr.toLowerCase();
}
while ((index = str.indexOf(searchStr, startIndex)) > -1) {
indices.push(index);
startIndex = index + 1;
}
return indices;
}
var searchStr = prompt("Enter a string.");
var str = prompt("What do you want to search for in the string?");
var indices = getIndicesOf(str, searchStr);
document.getElementById("output").innerHTML = indices + "";
_x000D_
<div id="output"></div>
_x000D_
Changing it to + 1
instead of + searchStrLen
will allow the index 1 to be in the indices array if I have an str of aaaaaa
and a searchStr of aaa
.
P.S. If anyone would like comments in the code to explain how the code works, please say so, and I'll be happy to respond to the request.
Check this solution which will able to find same character string too, let me know if something missing or not right.
function indexes(source, find) {_x000D_
if (!source) {_x000D_
return [];_x000D_
}_x000D_
if (!find) {_x000D_
return source.split('').map(function(_, i) { return i; });_x000D_
}_x000D_
source = source.toLowerCase();_x000D_
find = find.toLowerCase();_x000D_
var result = [];_x000D_
var i = 0;_x000D_
while(i < source.length) {_x000D_
if (source.substring(i, i + find.length) == find)_x000D_
result.push(i++);_x000D_
else_x000D_
i++_x000D_
}_x000D_
return result;_x000D_
}_x000D_
console.log(indexes('aaaaaaaa', 'aaaaaa'))_x000D_
console.log(indexes('aeeaaaaadjfhfnaaaaadjddjaa', 'aaaa'))_x000D_
console.log(indexes('wordgoodwordgoodgoodbestword', 'wordgood'))_x000D_
console.log(indexes('I learned to play the Ukulele in Lebanon.', 'le'))
_x000D_
Thanks for all the replies. I went through all of them and came up with a function that gives the first an last index of each occurrence of the 'needle' substring . I am posting it here in case it will help someone.
Please note, it is not the same as the original request for only the beginning of each occurrence. It suits my usecase better because you don't need to keep the needle length.
function findRegexIndices(text, needle, caseSensitive){
var needleLen = needle.length,
reg = new RegExp(needle, caseSensitive ? 'gi' : 'g'),
indices = [],
result;
while ( (result = reg.exec(text)) ) {
indices.push([result.index, result.index + needleLen]);
}
return indices
}
Follow the answer of @jcubic, his solution caused a small confusion for my case
For example var result = indexes('aaaa', 'aa')
will return [0, 1, 2]
instead of [0, 2]
So I updated a bit his solution as below to match my case
function indexes(text, subText, caseSensitive) {
var _source = text;
var _find = subText;
if (caseSensitive != true) {
_source = _source.toLowerCase();
_find = _find.toLowerCase();
}
var result = [];
for (var i = 0; i < _source.length;) {
if (_source.substring(i, i + _find.length) == _find) {
result.push(i);
i += _find.length; // found a subText, skip to next position
} else {
i += 1;
}
}
return result;
}
Here's my code (using search and slice methods)
let s = "I learned to play the Ukulele in Lebanon"
let sub = 0
let matchingIndex = []
let index = s.search(/le/i)
while( index >= 0 ){
matchingIndex.push(index+sub);
sub = sub + ( s.length - s.slice( index+1 ).length )
s = s.slice( index+1 )
index = s.search(/le/i)
}
console.log(matchingIndex)
_x000D_
You sure can do this!
//make a regular expression out of your needle
var needle = 'le'
var re = new RegExp(needle,'gi');
var haystack = 'I learned to play the Ukulele';
var results = new Array();//this is the results you want
while (re.exec(haystack)){
results.push(re.lastIndex);
}
Edit: learn to spell RegExp
Also, I realized this isn't exactly what you want, as lastIndex
tells us the end of the needle not the beginning, but it's close - you could push re.lastIndex-needle.length
into the results array...
Edit: adding link
@Tim Down's answer uses the results object from RegExp.exec(), and all my Javascript resources gloss over its use (apart from giving you the matched string). So when he uses result.index
, that's some sort of unnamed Match Object. In the MDC description of exec, they actually describe this object in decent detail.
Here is a simple code snippet:
function getIndexOfSubStr(str, searchToken, preIndex, output) {
var result = str.match(searchToken);
if (result) {
output.push(result.index +preIndex);
str=str.substring(result.index+searchToken.length);
getIndexOfSubStr(str, searchToken, preIndex, output)
}
return output;
}
var str = "my name is 'xyz' and my school name is 'xyz' and my area name is 'xyz' ";
var searchToken ="my";
var preIndex = 0;
console.log(getIndexOfSubStr(str, searchToken, preIndex, []));
_x000D_
function countInString(searchFor,searchIn){
var results=0;
var a=searchIn.indexOf(searchFor)
while(a!=-1){
searchIn=searchIn.slice(a*1+searchFor.length);
results++;
a=searchIn.indexOf(searchFor);
}
return results;
}
I am a bit late to the party (by almost 10 years, 2 months), but one way for future coders is to do it using while loop and indexOf()
let haystack = "I learned to play the Ukulele in Lebanon.";
let needle = "le";
let pos = 0; // Position Ref
let result = []; // Final output of all index's.
let hayStackLower = haystack.toLowerCase();
// Loop to check all occurrences
while (hayStackLower.indexOf(needle, pos) != -1) {
result.push(hayStackLower.indexOf(needle , pos));
pos = hayStackLower.indexOf(needle , pos) + 1;
}
console.log("Final ", result); // Returns all indexes or empty array if not found
the below code will do the job for you :
function indexes(source, find) {
var result = [];
for(i=0;i<str.length; ++i) {
// If you want to search case insensitive use
// if (source.substring(i, i + find.length).toLowerCase() == find) {
if (source.substring(i, i + find.length) == find) {
result.push(i);
}
}
return result;
}
indexes("hello, how are you", "ar")
Here is an example from the MDN docs itself:
var str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
var regexp = /[A-E]/gi;
var matches_array = str.match(regexp);
console.log(matches_array);
// ['A', 'B', 'C', 'D', 'E', 'a', 'b', 'c', 'd', 'e']
If you just want to find the position of all matches I'd like to point you to a little hack:
var haystack = 'I learned to play the Ukulele in Lebanon.',
needle = 'le',
splitOnFound = haystack.split(needle).map(function (culm)
{
return this.pos += culm.length + needle.length
}, {pos: -needle.length}).slice(0, -1); // {pos: ...} – Object wich is used as this
console.log(splitOnFound);
_x000D_
It might not be applikable if you have a RegExp with variable length but for some it might be helpful.
This is case sensitive. For case insensitivity use String.toLowerCase
function before.
Here is regex free version:
function indexes(source, find) {
if (!source) {
return [];
}
// if find is empty string return all indexes.
if (!find) {
// or shorter arrow function:
// return source.split('').map((_,i) => i);
return source.split('').map(function(_, i) { return i; });
}
var result = [];
for (i = 0; i < source.length; ++i) {
// If you want to search case insensitive use
// if (source.substring(i, i + find.length).toLowerCase() == find) {
if (source.substring(i, i + find.length) == find) {
result.push(i);
}
}
return result;
}
indexes("I learned to play the Ukulele in Lebanon.", "le")
EDIT: and if you want to match strings like 'aaaa' and 'aa' to find [0, 2] use this version:
function indexes(source, find) {
if (!source) {
return [];
}
if (!find) {
return source.split('').map(function(_, i) { return i; });
}
var result = [];
var i = 0;
while(i < source.length) {
if (source.substring(i, i + find.length) == find) {
result.push(i);
i += find.length;
} else {
i++;
}
}
return result;
}
One liner using String.protype.matchAll
(ES2020):
[...sourceStr.matchAll(new RegExp(searchStr, 'gi'))].map(a => a.index)
Using your values:
const sourceStr = 'I learned to play the Ukulele in Lebanon.';
const searchStr = 'le';
const indexes = [...sourceStr.matchAll(new RegExp(searchStr, 'gi'))].map(a => a.index);
console.log(indexes); // [2, 25, 27, 33]
If you're worried about doing a spread and a map()
in one line, I ran it with a for...of
loop for a million iterations (using your strings). The one liner averages 1420ms while the for...of
averages 1150ms on my machine. That's not an insignificant difference, but the one liner will work fine if you're only doing a handful of matches.
Source: Stackoverflow.com