[sql-server] SQL Server - find nth occurrence in a string

I have a table column that contains values such as abc_1_2_3_4.gif or zzz_12_3_3_45.gif etc.

I want to find the index of each underscore _ in the above values. There will only ever be four underscores but given that they can be in any position in the string, how can I achieve this?

I've tried the substring and charindex function, but I can only reliably get hold of the first one. Any ideas?

This question is related to sql-server

The answer is


DECLARE @x VARCHAR(32) = 'MS-SQL-Server';

SELECT 
SUBSTRING(@x,0,CHARINDEX('-',LTRIM(RTRIM(@x)))) A,
SUBSTRING(@x,CHARINDEX('-',LTRIM(RTRIM(@x)))+1,CHARINDEX('-' 
,LTRIM(RTRIM(@x)))) B,
SUBSTRING(@x,CHARINDEX('-',REVERSE(LTRIM(RTRIM(@x))))+1,LEN(@x)-1) C


A   B   C
MS  SQL Server

You can use the same function inside for the position +1

charindex('_', [TEXT], (charindex('_', [TEXT], 1))+1)

in where +1 is the nth time you will want to find.


You can look for the four underscore in this way:

create table #test
( t varchar(50) );

insert into #test values 
( 'abc_1_2_3_4.gif'),
('zzz_12_3_3_45.gif');

declare @t varchar(50);
declare @t_aux varchar(50);
declare @t1 int;
declare @t2 int;
declare @t3 int;
declare @t4 int;

DECLARE t_cursor CURSOR
    FOR SELECT t FROM #test
OPEN t_cursor
FETCH NEXT FROM t_cursor into @t;?
set @t1 = charindex( '_', @t )
set @t2 = charindex( '_', @t , @t1+1)
set @t3 = charindex( '_', @t , @t2+1)
set @t4 = charindex( '_', @t , @t3+1)

select @t1, @t2, t3, t4

--do a loop to iterate over all table

you can test it here.

Or in this simple way:

select 
  charindex( '_', t ) as first,
  charindex( '_', t, charindex( '_', t ) + 1 ) as second,
  ...
from 
  #test

I've used a function to grab the "nth" element from a delimited string field with great success. Like mentioned above, it's not a "fast" way of dealing with things but it sure as heck is convenient.

create function GetArrayIndex(@delimited nvarchar(max), @index int,  @delimiter nvarchar(100) = ',')  returns nvarchar(max)  
as    
begin     
 declare @xml xml, @result nvarchar(max)  
 set @xml = N'<root><r>' + replace(@delimited, @delimiter,'</r><r>') + '</r></root>'  
 select @result = r.value('.','varchar(max)')   
 from @xml.nodes('//root/r[sql:variable("@index")]') as records(r)  

 return @result   
end    

DECLARE @str AS VARCHAR(100)
SET @str='1,2  , 3,   4,   5,6'
SELECT COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[1]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[2]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[3]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[4]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[5]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[6]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[7]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[8]', 'varchar(128)')), ''),
       COALESCE(LTRIM(CAST(('<X>'+REPLACE(@str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[9]', 'varchar(128)')), '')

I was toying with a faster way to do this than simply iterating through the string.

CREATE FUNCTION [ssf_GetNthSeparatorPosition] ( @TargetString VARCHAR(MAX)
                                              , @Sep VARCHAR(25)
                                              , @n INTEGER )
RETURNS INTEGER
/****************************************************************************************
--#############################################################################
-- Returns the position of the Nth Charactor sequence
--                                     1234567890123456789
-- Declare @thatString varchar(max) = 'hi,there,jay,yo'
  Select dbo.ssf_GetNthSeparatorPosition(@thatString, ',', 3) --would return 13
--############################################################################ 


****************************************************************************************/
AS
    BEGIN
        DECLARE @Retval INTEGER = 0
        DECLARE @CurPos INTEGER = 0
        DECLARE @LenSep INTEGER = LEN(@Sep)

        SELECT @CurPos = CHARINDEX(@Sep, @TargetString)

        IF ISNULL(@LenSep, 0) > 0
            AND @CurPos > 0
            BEGIN

               SELECT @CurPos = 0
              ;with lv0 AS (SELECT 0 g UNION ALL SELECT 0)
                            ,lv1 AS (SELECT 0 g FROM lv0 a CROSS JOIN lv0 b) -- 4
                            ,lv2 AS (SELECT 0 g FROM lv1 a CROSS JOIN lv1 b) -- 16
                            ,lv3 AS (SELECT 0 g FROM lv2 a CROSS JOIN lv2 b) -- 256
                            ,lv4 AS (SELECT 0 g FROM lv3 a CROSS JOIN lv3 b) -- 65,536
                            ,lv5 AS (SELECT 0 g FROM lv4 a CROSS JOIN lv4 b) -- 4,294,967,296
                            ,Tally (n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM lv5),
                        results
                          AS ( SELECT n - LEN(@Sep) AS Nth
                                ,   row_number() OVER ( ORDER BY n ) - 1 AS Position
                                FROM Tally t
                                WHERE n BETWEEN 1
                                        AND     DATALENGTH(@TargetString) + DATALENGTH(@Sep)
                                    AND SUBSTRING(@Sep + @TargetString, n, LEN(@Sep)) = @Sep)
                    SELECT @CurPos = Nth
                        FROM results
                        WHERE results.Position = @n


            END
        RETURN @CurPos

    END

GO

declare @a nvarchar(50)='Enter Your string '
declare @character char='e'
declare @nthoccurence int = 2
declare @i int = 1
declare @j int =0
declare @count int = len(@a)-len(replace(@a,@character,''))

if(@count >= @nthoccurence)
begin
        while (@I <= @nthoccurence)
        begin
            set @j= CHARINDEX(@character,@a,@j+1)
            set @i= @i+1
        end
        print @j
end
else
    Print 'you have only '+convert(nvarchar ,@count)+' occurrences of '+@character
end

You can use the CHARINDEX and specify the starting location:

DECLARE @x VARCHAR(32) = 'MS-SQL-Server';

SELECT 
  STUFF(STUFF(@x,3 , 0, '/'), 8, 0, '/') InsertString
  ,CHARINDEX('-',LTRIM(RTRIM(@x))) FirstIndexOf
  ,CHARINDEX('-',LTRIM(RTRIM(@x)), (CHARINDEX('-', LTRIM(RTRIM(@x)) )+1)) SecondIndexOf
  ,CHARINDEX('-',@x,CHARINDEX('-',@x, (CHARINDEX('-',@x)+1))+1) ThirdIndexOf
  ,CHARINDEX('-',REVERSE(LTRIM(RTRIM(@x)))) LastIndexOf;
GO

DECLARE @LEN INT
DECLARE @VAR VARCHAR(20)
SET @VAR = 'HELLO WORLD'
SET @LEN = LEN(@VAR)
--SELECT @LEN
SELECT PATINDEX('%O%',SUBSTRING(@VAR,PATINDEX('%O%' ,@VAR) +  1 ,PATINDEX('%O%',@VAR) + 1)) + PATINDEX('%O%',@VAR)

You can use the following function to split the values by a delimiter. It'll return a table and to find the nth occurrence just make a select on it! Or change it a little for it to return what you need instead of the table.

CREATE FUNCTION dbo.Split
(
    @RowData nvarchar(2000),
    @SplitOn nvarchar(5)
)  
RETURNS @RtnValue table 
(
    Id int identity(1,1),
    Data nvarchar(100)
) 
AS  
BEGIN 
    Declare @Cnt int
    Set @Cnt = 1

    While (Charindex(@SplitOn,@RowData)>0)
    Begin
        Insert Into @RtnValue (data)
        Select 
            Data = ltrim(rtrim(Substring(@RowData,1,Charindex(@SplitOn,@RowData)-1)))

        Set @RowData = Substring(@RowData,Charindex(@SplitOn,@RowData)+1,len(@RowData))
        Set @Cnt = @Cnt + 1
    End

    Insert Into @RtnValue (data)
    Select Data = ltrim(rtrim(@RowData))

    Return
END

I did this creating several separate custom functions, one for each position of the searched character i.e. 2nd, 3rd:

CREATE FUNCTION [dbo].[fnCHARPOS2] (@SEARCHCHAR VARCHAR(255), @SEARCHSTRING VARCHAR(255)) RETURNS INT AS BEGIN RETURN CHARINDEX(@SEARCHCHAR,@SEARCHSTRING(CHARINDEX(@SEARCHCHAR,@SEARCHSTRING,0)+1));

CREATE FUNCTION [dbo].[fnCHARPOS3]
(@SEARCHCHAR VARCHAR(255),
@SEARCHSTRING VARCHAR(255))
RETURNS INT
AS
BEGIN
 RETURN CHARINDEX(@SEARCHCHAR,@SEARCHSTRING,    (CHARINDEX(@SEARCHCHAR,@SEARCHSTRING,    (CHARINDEX(@SEARCHCHAR,@SEARCHSTRING,0)+1)))+1);

You can then pass in as a parameter the character you are searching for and the string you are searching in:

So if you were searching for 'f' and wanted to know position of 1st 3 occurences:

select 
database.dbo.fnCHARPOS2('f',tablename.columnname),
database.dbo.fnCHARPOS3('f',tablename.columnname)
from tablename

It worked for me!


Try this

CREATE FUNCTION [dbo].[CHARINDEX2] (
    @expressionToFind VARCHAR(MAX),
    @expressionToSearch VARCHAR(MAX),
    @occurrenceIndex INT,
    @startLocation INT = 0
)
RETURNS INT
AS BEGIN

IF @occurrenceIndex < 1 BEGIN
    RETURN CAST('The argument @occurrenceIndex must be a positive integer.' AS INT)
END

IF @startLocation < 0 BEGIN
    RETURN CAST('The argument @startLocation must be a non negative integer.' AS INT)
END

DECLARE @returnIndex INT

SET @returnIndex = CHARINDEX(@expressionToFind, @expressionToSearch, @startLocation)

IF (@occurrenceIndex = 1) BEGIN
    RETURN @returnIndex
END

DECLARE @target_length INT
SET @target_length = LEN(@expressionToFind)
SET @occurrenceIndex += -1

WHILE (@occurrenceIndex > 0 AND @returnIndex > 0) BEGIN
    SET @returnIndex = CHARINDEX(@expressionToFind, @expressionToSearch, @returnIndex + @target_length);
    SET @occurrenceIndex += -1
END

RETURN @returnIndex

END
GO

You can try peeling the variable/array, assuming distinctness in your list

declare @array table   ----table of values
(
    id int identity(1,1)
    ,value nvarchar(max)
)
DECLARE @VALUE NVARCHAR(MAX)='val1_val2_val3_val4_val5_val6_val7'----string array
DECLARE @CURVAL NVARCHAR(MAX)     ---current value
DECLARE @DELIM NVARCHAR(1)='_'    ---delimiter
DECLARE @BREAKPT INT              ---current index of the delimiter 

WHILE EXISTS (SELECT @VALUE)  
    BEGIN
        SET @BREAKPT=CHARINDEX(@DELIM,@VALUE)   ---set the current index
        ---
        If @BREAKPT<> 0                          ---index at 0 breaks the loop
            begin
                SET @CURVAL=SUBSTRING(@VALUE,1,@BREAKPT-1)                  ---current value
                set @VALUE=REPLACE(@VALUE,SUBSTRING(@VALUE,1,@BREAKPT),'')  ---current value and delimiter, replace
                insert into @array(value)                                   ---insert data 
                select @CURVAL
            end
        else
            begin
                SET @CURVAL=@VALUE                                          ---current value now last value
                insert into @array(value)                                   ---insert data
                select @CURVAL
                break                                                       ---break loop
            end
    end

select * from @array    ---find nth occurance given the id

My SQL supports the function of a substring_Index where it will return the postion of a value in a string for the n occurance. A similar User defined function could be written to achieve this. Example in the link

Alternatively you could use charindex function call it x times to report the location of each _ given a starting postion +1 of the previously found instance. until a 0 is found

Edit: NM Charindex is the correct function


I decided to use a recursive function because for me it was easier to follow the logic. Note that SQL Server has a default function recursion limit of 32, so this is only good for smaller workloads.

create function dbo._charindex_nth (
  @FindThis varchar(8000),
  @InThis varchar(max),
  @StartFrom int,
  @NthOccurence tinyint
)
returns bigint
as
begin
  /*
  Recursive helper used by dbo.charindex_nth to return the position of the nth occurance of @FindThis in @InThis

  Who   When    What
  PJR   160421  Initial   
  */

  declare @Pos bigint

  if isnull(@NthOccurence, 0) <= 0 or isnull(@StartFrom, 0) <= 0
  begin
    select @Pos = 0
  end else begin
    if @NthOccurence = 1
    begin
      select @Pos = charindex(@FindThis, @InThis, @StartFrom)
    end else begin
      select @Pos = dbo._charindex_nth(@FindThis, @InThis, nullif(charindex(@FindThis, @InThis, @StartFrom), 0) + 1, @NthOccurence - 1)
    end
  end

  return @Pos
end

create function dbo.charindex_nth (
  @FindThis varchar(8000),
  @InThis varchar(max),
  @NthOccurence tinyint
)
returns bigint
as
begin
  /*
  Returns the position of the nth occurance of @FindThis in @InThis

  Who   When    What
  PJR   160421  Initial   
  */

  return dbo._charindex_nth(@FindThis, @InThis, 1, @NthOccurence)
end

declare @val varchar(max) = 'zzz_12_3_3_45.gif'

select dbo.charindex_nth('_', @val, 1) Underscore1
  , dbo.charindex_nth('_', @val, 2) Underscore2
  , dbo.charindex_nth('_', @val, 3) Underscore3
  , dbo.charindex_nth('_', @val, 4) Underscore4

Inspired by Alex K's reply One way (2k8), I have created a script for a Token Function for the SQL Server for returning a specific token from a string. I needed this for refacturing a SSIS-package to T-SQL without having to implement Alex' solution a number of times manually. My function has one disadvantage: It returns the token value as a table (one column, one row) instead of as a varchar value. If anyone has a solution for this, please let me know.

DROP FUNCTION [RDW].[token]
GO

create function [RDW].[token] (@string varchar(8000), @split varchar(50), @returnIndex int) 
returns table  
as 
    return with T(img, starts, pos, [index]) as ( 
        select @string, 1, charindex(@split, @string), 0 
        union all 
        select @string, pos + 1, charindex(@split, @string, pos + 1), [index]+1 
        from t 
        where pos > 0
    )
    select substring(img, starts, case when pos > 0 then pos - starts else len(img) end) token
    from T
    where [index] = @returnIndex 
GO

DECLARE @T AS TABLE(pic_name VARCHAR(100));
INSERT INTO @T VALUES ('abc_1_2_3_4.gif'),('zzz_12_3_3_45.gif');

SELECT A.pic_name, P1.D, P2.D, P3.D, P4.D 
FROM @T A
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name),0) AS D)  P1
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name, P1.D+1), 0) AS D)  P2
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name, P2.D+1),0) AS D)  P3
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name, P3.D+1),0) AS D)  P4

A simple sample to do this with xml conversion:

SELECT 'A|B|C'
     , concat('<x>', REPLACE('A|B|C', '|', '</x><x>'), '</x>')
     , cast(concat('<x>', REPLACE('A|B|C', '|', '</x><x>'), '</x>') as xml).query('/x[2]')
     , cast(concat('<x>', REPLACE('A|B|C', '|', '</x><x>'), '</x>') as xml).value('/x[2]',     
       'varchar');

And here a translation for your sample:

SELECT gifname
      ,cast(concat('<x>', REPLACE(gifname, '_', '</x><x>'), '</x>') as xml).query('/x[2]') as xmlelement
     , cast(concat('<x>', REPLACE(gifname, '_', '</x><x>'), '</x>') as xml).value('/x[2]', 'varchar(10)') as result
    FROM (
      SELECT 'abc_1_2_3_4.gif' as gifname
      UNION ALL
      SELECT 'zzz_12_3_3_45.gif'
    ) tmp