[c] Split string with delimiters in C

How do I write a function to split and return an array for a string with delimiters in the C programming language?

char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
str_split(str,',');

This question is related to c string split

The answer is


This optimized method create (or update an existing) array of pointers in *result and returns the number of elements in *count.

Use "max" to indicate the maximum number of strings you expect (when you specify an existing array or any other reaseon), else set it to 0

To compare against a list of delimiters, define delim as a char* and replace the line:

if (str[i]==delim) {

with the two following lines:

 char *c=delim; while(*c && *c!=str[i]) c++;
 if (*c) {

Enjoy

#include <stdlib.h>
#include <string.h>

char **split(char *str, size_t len, char delim, char ***result, unsigned long *count, unsigned long max) {
  size_t i;
  char **_result;

  // there is at least one string returned
  *count=1;

  _result= *result;

  // when the result array is specified, fill it during the first pass
  if (_result) {
    _result[0]=str;
  }

  // scan the string for delimiter, up to specified length
  for (i=0; i<len; ++i) {

    // to compare against a list of delimiters,
    // define delim as a string and replace 
    // the next line:
    //     if (str[i]==delim) {
    //
    // with the two following lines:
    //     char *c=delim; while(*c && *c!=str[i]) c++;
    //     if (*c) {
    //       
    if (str[i]==delim) {

      // replace delimiter with zero
      str[i]=0;

      // when result array is specified, fill it during the first pass
      if (_result) {
        _result[*count]=str+i+1;
      }

      // increment count for each separator found
      ++(*count);

      // if max is specified, dont go further
      if (max && *count==max)  {
        break;
      }

    }
  }

  // when result array is specified, we are done here
  if (_result) {
    return _result;
  }

  // else allocate memory for result
  // and fill the result array                                                                                    

  *result=malloc((*count)*sizeof(char*));
  if (!*result) {
    return NULL;
  }
  _result=*result;

  // add first string to result
  _result[0]=str;

  // if theres more strings
  for (i=1; i<*count; ++i) {

    // find next string
    while(*str) ++str;
    ++str;

    // add next string to result
    _result[i]=str;

  }

  return _result;
}  

Usage example:

#include <stdio.h>

int main(int argc, char **argv) {
  char *str="JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
  char **result=malloc(6*sizeof(char*));
  char **result2=0;
  unsigned long count;
  unsigned long count2;
  unsigned long i;

  split(strdup(str),strlen(str),',',&result,&count,6);
  split(strdup(str),strlen(str),',',&result2,&count2,0);

  if (result)
  for (i=0; i<count; ++i) {
    printf("%s\n",result[i]);
  }

  printf("\n");

  if (result2)
  for (i=0; i<count2; ++i) {
    printf("%s\n", result2[i]);
  }

  return 0;

}

Explode & implode - initial string remains intact, dynamic memory allocation

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

typedef struct
{
    uintptr_t   ptr;
    int         size;
} token_t;

int explode(char *str, int slen, const char *delimiter, token_t **tokens)
{
    int i = 0, c1 = 0, c2 = 0;

    for(i = 0; i <= slen; i++)
    {
            if(str[i] == *delimiter)
            {
                c1++;
            }
    }

    if(c1 == 0)
    {
            return -1;
    }

    *tokens = (token_t*)calloc((c1 + 1), sizeof(token_t));
    ((*tokens)[c2]).ptr = (uintptr_t)str;

    i = 0; 
    while(i <= slen)
    {
        if((str[i] == *delimiter) || (i == slen))
        {
                ((*tokens)[c2]).size = (int)((uintptr_t)&(str[i]) - (uintptr_t)(((*tokens)[c2]).ptr));
                if(i < slen)
                {
                    c2++;
                    ((*tokens)[c2]).ptr = (uintptr_t)&(str[i + 1]);
                }
        }
        i++;
    }
    return (c1 + 1);
}

char* implode(token_t *tokens, int size, const char *delimiter)
{
    int     i, len = 0;
    char    *str;

    for(i = 0; i < len; i++)
    {
        len += tokens[i].size + 1;
    }

    str = (char*)calloc(len, sizeof(char));

    len = 0;
    for(i = 0; i < size; i++)
    {
        memcpy((void*)&str[len], (void*)tokens[i].ptr, tokens[i].size);
        len += tokens[i].size;
        str[(len++)] = *delimiter;
    }

    str[len - 1] = '\0';

    return str;
}

Usage:

int main(int argc, char **argv)
{
    int         i, c;
    char        *exp = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
    token_t     *tokens;
    char        *imp;

    printf("%s\n", exp);

    if((c = explode(exp, strlen(exp), ",", &tokens)) > 0)
    {
        imp = implode(tokens, c, ",");
        printf("%s\n", imp);

        for(i = 0; i < c; i++)
        {
            printf("%.*s, %d\n", tokens[i].size, (char*)tokens[i].ptr, tokens[i].size);
        }
    }

    free((void*)tokens);
    free((void*)imp);
    return 0;
}

String tokenizer this code should put you in the right direction.

int main(void) {
  char st[] ="Where there is will, there is a way.";
  char *ch;
  ch = strtok(st, " ");
  while (ch != NULL) {
  printf("%s\n", ch);
  ch = strtok(NULL, " ,");
  }
  getch();
  return 0;
}

If you are willing to use an external library, I can't recommend bstrlib enough. It takes a little extra setup, but is easier to use in the long run.

For example, split the string below, one first creates a bstring with the bfromcstr() call. (A bstring is a wrapper around a char buffer). Next, split the string on commas, saving the result in a struct bstrList, which has fields qty and an array entry, which is an array of bstrings.

bstrlib has many other functions to operate on bstrings

Easy as pie...

#include "bstrlib.h"
#include <stdio.h>
int main() {
  int i;
  char *tmp = "Hello,World,sak";
  bstring bstr = bfromcstr(tmp);
  struct bstrList *blist = bsplit(bstr, ',');
  printf("num %d\n", blist->qty);
  for(i=0;i<blist->qty;i++) {
    printf("%d: %s\n", i, bstr2cstr(blist->entry[i], '_'));
  }

}

My code (tested):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int dtmsplit(char *str, const char *delim, char ***array, int *length ) {
  int i=0;
  char *token;
  char **res = (char **) malloc(0 * sizeof(char *));

  /* get the first token */
   token = strtok(str, delim);
   while( token != NULL ) 
   {
        res = (char **) realloc(res, (i + 1) * sizeof(char *));
        res[i] = token;
        i++;
      token = strtok(NULL, delim);
   }
   *array = res;
   *length = i;
  return 1;
}

int main()
{
    int i;
    int c = 0;
    char **arr = NULL;

    int count =0;

    char str[80] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
    c = dtmsplit(str, ",", &arr, &count);
    printf("Found %d tokens.\n", count);

    for (i = 0; i < count; i++)
        printf("string #%d: %s\n", i, arr[i]);

   return(0);
}

Result:

Found 12 tokens.
string #0: JAN
string #1: FEB
string #2: MAR
string #3: APR
string #4: MAY
string #5: JUN
string #6: JUL
string #7: AUG
string #8: SEP
string #9: OCT
string #10: NOV
string #11: DEC

Two issues surrounding this question are memory management and thread safety. As you can see from the numerous posts, this isn't an easy task to accomplish seamlessly in C. I desired a solution that is:

  • Thread safe. (strtok is not thread safe)
  • Does not employ malloc or any of it's derivatives (to avoid memory management issues)
  • Checks array bounds on the individual fields (to avoid segment faults on unknown data)
  • Works with multi-byte field separators (utf-8)
  • ignores extra fields in the input
  • provides soft error routine for invalid field lengths

The solution I came up meets all of these criteria. It's probably a little more work to setup than some other solutions posted here, but I think that in practice, the extra work is worth it in order to avoid the common pitfalls of other solutions.

#include <stdio.h>
#include <string.h>

struct splitFieldType {
    char *field;
    int   maxLength;
};

typedef struct splitFieldType splitField;

int strsplit(splitField *fields, int expected, const char *input, const char *fieldSeparator, void (*softError)(int fieldNumber,int expected,int actual))  {
    int i;
    int fieldSeparatorLen=strlen(fieldSeparator);
    const char *tNext, *tLast=input;

    for (i=0; i<expected && (tNext=strstr(tLast, fieldSeparator))!=NULL; ++i) {
        int len=tNext-tLast;
        if (len>=fields[i].maxLength) {
            softError(i,fields[i].maxLength-1,len);
            len=fields[i].maxLength-1;
        }
        fields[i].field[len]=0;
        strncpy(fields[i].field,tLast,len);
        tLast=tNext+fieldSeparatorLen;
    }
    if (i<expected) {
        if (strlen(tLast)>fields[i].maxLength) {
            softError(i,fields[i].maxLength,strlen(tLast));
        } else {
            strcpy(fields[i].field,tLast);
        }
        return i+1;
    } else {
        return i;
    }
}


void monthSplitSoftError(int fieldNumber, int expected, int actual) {
    fprintf(stderr,"monthSplit: input field #%d is %d bytes, expected %d bytes\n",fieldNumber+1,actual,expected);
}


int main() {
  const char *fieldSeparator=",";
  const char *input="JAN,FEB,MAR,APRI,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR";

  struct monthFieldsType {
    char field1[4];
    char field2[4];
    char field3[4];
    char field4[4];
    char field5[4];
    char field6[4];
    char field7[4];
    char field8[4];
    char field9[4];
    char field10[4];
    char field11[4];
    char field12[4];
  } monthFields;

  splitField inputFields[12] = {
    {monthFields.field1,  sizeof(monthFields.field1)},
    {monthFields.field2,  sizeof(monthFields.field2)},
    {monthFields.field3,  sizeof(monthFields.field3)},
    {monthFields.field4,  sizeof(monthFields.field4)},
    {monthFields.field5,  sizeof(monthFields.field5)},
    {monthFields.field6,  sizeof(monthFields.field6)},
    {monthFields.field7,  sizeof(monthFields.field7)},
    {monthFields.field8,  sizeof(monthFields.field8)},
    {monthFields.field9,  sizeof(monthFields.field9)},
    {monthFields.field10, sizeof(monthFields.field10)},
    {monthFields.field11, sizeof(monthFields.field11)},
    {monthFields.field12, sizeof(monthFields.field12)}
  };

  int expected=sizeof(inputFields)/sizeof(splitField);

  printf("input data: %s\n", input);
  printf("expecting %d fields\n",expected);

  int ct=strsplit(inputFields, expected, input, fieldSeparator, monthSplitSoftError);

  if (ct!=expected) {
    printf("string split %d fields, expected %d\n", ct,expected);
  }

  for (int i=0;i<expected;++i) {
    printf("field %d: %s\n",i+1,inputFields[i].field);
  }

  printf("\n");
  printf("Direct structure access, field 10: %s", monthFields.field10);
}

Below is an example compile and output. Note that in my example, I purposefully spelled out "APRIL" so that you can see how the soft error works.

$ gcc strsplitExample.c && ./a.out
input data: JAN,FEB,MAR,APRIL,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR
expecting 12 fields
monthSplit: input field #4 is 5 bytes, expected 3 bytes
field 1: JAN
field 2: FEB
field 3: MAR
field 4: APR
field 5: MAY
field 6: JUN
field 7: JUL
field 8: AUG
field 9: SEP
field 10: OCT
field 11: NOV
field 12: DEC

Direct structure access, field 10: OCT

Enjoy!


Here is another implementation that will operate safely to tokenize a string-literal matching the prototype requested in the question returning an allocated pointer-to-pointer to char (e.g. char **). The delimiter string can contain multiple characters, and the input string can contain any number of tokens. All allocations and reallocations are handled by malloc or realloc without POSIX strdup.

The initial number of pointers allocated is controlled by the NPTRS constant and the only limitation is that it be greater than zero. The char ** returned contains a sentinel NULL after the last token similar to *argv[] and in the form usable by execv, execvp and execve.

As with strtok() multiple sequential delimiters are treated as a single delimiter, so "JAN,FEB,MAR,APR,MAY,,,JUN,JUL,AUG,SEP,OCT,NOV,DEC" will be parsed as if only a single ',' separates "MAY,JUN".

The function below is commented in-line and a short main() was added splitting the months. The initial number of pointers allocated was set at 2 to force three reallocation during tokenizing the input string:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define NPTRS 2     /* initial number of pointers to allocate (must be > 0) */

/* split src into tokens with sentinel NULL after last token.
 * return allocated pointer-to-pointer with sentinel NULL on success,
 * or NULL on failure to allocate initial block of pointers. The number
 * of allocated pointers are doubled each time reallocation required.
 */
char **strsplit (const char *src, const char *delim)
{
    int i = 0, in = 0, nptrs = NPTRS;       /* index, in/out flag, ptr count */
    char **dest = NULL;                     /* ptr-to-ptr to allocate/fill */
    const char *p = src, *ep = p;           /* pointer and end-pointer */

    /* allocate/validate nptrs pointers for dest */
    if (!(dest = malloc (nptrs * sizeof *dest))) {
        perror ("malloc-dest");
        return NULL;
    }
    *dest = NULL;   /* set first pointer as sentinel NULL */

    for (;;) {  /* loop continually until end of src reached */
        if (!*ep || strchr (delim, *ep)) {  /* if at nul-char or delimiter char */
            size_t len = ep - p;            /* get length of token */
            if (in && len) {                /* in-word and chars in token */
                if (i == nptrs - 1) {       /* used pointer == allocated - 1? */
                    /* realloc dest to temporary pointer/validate */
                    void *tmp = realloc (dest, 2 * nptrs * sizeof *dest);
                    if (!tmp) {
                        perror ("realloc-dest");
                        break;  /* don't exit, original dest still valid */
                    }
                    dest = tmp;             /* assign reallocated block to dest */
                    nptrs *= 2;             /* increment allocated pointer count */
                }
                /* allocate/validate storage for token */
                if (!(dest[i] = malloc (len + 1))) {
                    perror ("malloc-dest[i]");
                    break;
                }
                memcpy (dest[i], p, len);   /* copy len chars to storage */
                dest[i++][len] = 0;         /* nul-terminate, advance index */
                dest[i] = NULL;             /* set next pointer NULL */
            }
            if (!*ep)                       /* if at end, break */
                break;
            in = 0;                         /* set in-word flag 0 (false) */
        }
        else {  /* normal word char */
            if (!in)                        /* if not in-word */
                p = ep;                     /* update start to end-pointer */
            in = 1;                         /* set in-word flag 1 (true) */
        }
        ep++;   /* advance to next character */
    }

    return dest;
}

int main (void) {

    char *str = "JAN,FEB,MAR,APR,MAY,,,JUN,JUL,AUG,SEP,OCT,NOV,DEC",
        **tokens;                           /* pointer to pointer to char */

    if ((tokens = strsplit (str, ","))) {   /* split string into tokens */
        for (char **p = tokens; *p; p++) {  /* loop over filled pointers */
            puts (*p);
            free (*p);      /* don't forget to free allocated strings */
        }
        free (tokens);      /* and pointers */
    }
}

Example Use/Output

$ ./bin/splitinput
JAN
FEB
MAR
APR
MAY
JUN
JUL
AUG
SEP
OCT
NOV
DEC

Let me know if you have any further questions.


In the above example, there would be a way to return an array of null terminated strings (like you want) in place in the string. It would not make it possible to pass a literal string though, as it would have to be modified by the function:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

char** str_split( char* str, char delim, int* numSplits )
{
    char** ret;
    int retLen;
    char* c;

    if ( ( str == NULL ) ||
        ( delim == '\0' ) )
    {
        /* Either of those will cause problems */
        ret = NULL;
        retLen = -1;
    }
    else
    {
        retLen = 0;
        c = str;

        /* Pre-calculate number of elements */
        do
        {
            if ( *c == delim )
            {
                retLen++;
            }

            c++;
        } while ( *c != '\0' );

        ret = malloc( ( retLen + 1 ) * sizeof( *ret ) );
        ret[retLen] = NULL;

        c = str;
        retLen = 1;
        ret[0] = str;

        do
        {
            if ( *c == delim )
            {
                ret[retLen++] = &c[1];
                *c = '\0';
            }

            c++;
        } while ( *c != '\0' );
    }

    if ( numSplits != NULL )
    {
        *numSplits = retLen;
    }

    return ret;
}

int main( int argc, char* argv[] )
{
    const char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";

    char* strCpy;
    char** split;
    int num;
    int i;

    strCpy = malloc( strlen( str ) * sizeof( *strCpy ) );
    strcpy( strCpy, str );

    split = str_split( strCpy, ',', &num );

    if ( split == NULL )
    {
        puts( "str_split returned NULL" );
    }
    else
    {
        printf( "%i Results: \n", num );

        for ( i = 0; i < num; i++ )
        {
            puts( split[i] );
        }
    }

    free( split );
    free( strCpy );

    return 0;
}

There is probably a neater way to do it, but you get the idea.


I think strsep is still the best tool for this:

while ((token = strsep(&str, ","))) my_fn(token);

That is literally one line that splits a string.

The extra parentheses are a stylistic element to indicate that we're intentionally testing the result of an assignment, not an equality operator ==.

For that pattern to work, token and str both have type char *. If you started with a string literal, then you'd want to make a copy of it first:

// More general pattern:
const char *my_str_literal = "JAN,FEB,MAR";
char *token, *str, *tofree;

tofree = str = strdup(my_str_literal);  // We own str's memory now.
while ((token = strsep(&str, ","))) my_fn(token);
free(tofree);

If two delimiters appear together in str, you'll get a token value that's the empty string. The value of str is modified in that each delimiter encountered is overwritten with a zero byte - another good reason to copy the string being parsed first.

In a comment, someone suggested that strtok is better than strsep because strtok is more portable. Ubuntu and Mac OS X have strsep; it's safe to guess that other unixy systems do as well. Windows lacks strsep, but it has strbrk which enables this short and sweet strsep replacement:

char *strsep(char **stringp, const char *delim) {
  if (*stringp == NULL) { return NULL; }
  char *token_start = *stringp;
  *stringp = strpbrk(token_start, delim);
  if (*stringp) {
    **stringp = '\0';
    (*stringp)++;
  }
  return token_start;
}

Here is a good explanation of strsep vs strtok. The pros and cons may be judged subjectively; however, I think it's a telling sign that strsep was designed as a replacement for strtok.


My version:

int split(char* str, const char delimeter, char*** args) {
    int cnt = 1;
    char* t = str;

    while (*t == delimeter) t++;

    char* t2 = t;
    while (*(t2++))
        if (*t2 == delimeter && *(t2 + 1) != delimeter && *(t2 + 1) != 0) cnt++;

    (*args) = malloc(sizeof(char*) * cnt);

    for(int i = 0; i < cnt; i++) {
        char* ts = t;
        while (*t != delimeter && *t != 0) t++;

        int len = (t - ts + 1);
        (*args)[i] = malloc(sizeof(char) * len);
        memcpy((*args)[i], ts, sizeof(char) * (len - 1));
        (*args)[i][len - 1] = 0;

        while (*t == delimeter) t++;
    }

    return cnt;
}

Below is my strtok() implementation from zString library. zstring_strtok() differs from standard library's strtok() in the way it treats consecutive delimiters.

Just have a look at the code below,sure that you will get an idea about how it works (I tried to use as many comments as I could)

char *zstring_strtok(char *str, const char *delim) {
    static char *static_str=0;      /* var to store last address */
    int index=0, strlength=0;       /* integers for indexes */
    int found = 0;                  /* check if delim is found */

    /* delimiter cannot be NULL
    * if no more char left, return NULL as well
    */
    if (delim==0 || (str == 0 && static_str == 0))
        return 0;

    if (str == 0)
        str = static_str;

    /* get length of string */
    while(str[strlength])
        strlength++;

    /* find the first occurance of delim */
    for (index=0;index<strlength;index++)
        if (str[index]==delim[0]) {
            found=1;
            break;
        }

    /* if delim is not contained in str, return str */
    if (!found) {
        static_str = 0;
        return str;
    }

    /* check for consecutive delimiters
    *if first char is delim, return delim
    */
    if (str[0]==delim[0]) {
        static_str = (str + 1);
        return (char *)delim;
    }

    /* terminate the string
    * this assignmetn requires char[], so str has to
    * be char[] rather than *char
    */
    str[index] = '\0';

    /* save the rest of the string */
    if ((str + index + 1)!=0)
        static_str = (str + index + 1);
    else
        static_str = 0;

        return str;
}

Below is an example usage...

  Example Usage
      char str[] = "A,B,,,C";
      printf("1 %s\n",zstring_strtok(s,","));
      printf("2 %s\n",zstring_strtok(NULL,","));
      printf("3 %s\n",zstring_strtok(NULL,","));
      printf("4 %s\n",zstring_strtok(NULL,","));
      printf("5 %s\n",zstring_strtok(NULL,","));
      printf("6 %s\n",zstring_strtok(NULL,","));

  Example Output
      1 A
      2 B
      3 ,
      4 ,
      5 C
      6 (null)

The library can be downloaded from Github https://github.com/fnoyanisi/zString


This function takes a char* string and splits it by the deliminator. There can be multiple deliminators in a row. Note that the function modifies the orignal string. You must make a copy of the original string first if you need the original to stay unaltered. This function doesn't use any cstring function calls so it might be a little faster than others. If you don't care about memory allocation, you can allocate sub_strings at the top of the function with size strlen(src_str)/2 and (like the c++ "version" mentioned) skip the bottom half of the function. If you do this, the function is reduced to O(N), but the memory optimized way shown below is O(2N).

The function:

char** str_split(char *src_str, const char deliminator, size_t &num_sub_str){
  //replace deliminator's with zeros and count how many
  //sub strings with length >= 1 exist
  num_sub_str = 0;
  char *src_str_tmp = src_str;
  bool found_delim = true;
  while(*src_str_tmp){
    if(*src_str_tmp == deliminator){
      *src_str_tmp = 0;
      found_delim = true;
    }
    else if(found_delim){ //found first character of a new string
      num_sub_str++;
      found_delim = false;
      //sub_str_vec.push_back(src_str_tmp); //for c++
    }
    src_str_tmp++;
  }
  printf("Start - found %d sub strings\n", num_sub_str);
  if(num_sub_str <= 0){
    printf("str_split() - no substrings were found\n");
    return(0);
  }

  //if you want to use a c++ vector and push onto it, the rest of this function
  //can be omitted (obviously modifying input parameters to take a vector, etc)

  char **sub_strings = (char **)malloc( (sizeof(char*) * num_sub_str) + 1);
  const char *src_str_terminator = src_str_tmp;
  src_str_tmp = src_str;
  bool found_null = true;
  size_t idx = 0;
  while(src_str_tmp < src_str_terminator){
    if(!*src_str_tmp) //found a NULL
      found_null = true;
    else if(found_null){
      sub_strings[idx++] = src_str_tmp;
      //printf("sub_string_%d: [%s]\n", idx-1, sub_strings[idx-1]);
      found_null = false;
    }
    src_str_tmp++;
  }
  sub_strings[num_sub_str] = NULL;

  return(sub_strings);
}

How to use it:

  char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
  char *str = strdup(months);
  size_t num_sub_str;
  char **sub_strings = str_split(str, ',', num_sub_str);
  char *endptr;
  if(sub_strings){
    for(int i = 0; sub_strings[i]; i++)
      printf("[%s]\n", sub_strings[i]);
  }
  free(sub_strings);
  free(str);

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

/**
 *  splits str on delim and dynamically allocates an array of pointers.
 *
 *  On error -1 is returned, check errno
 *  On success size of array is returned, which may be 0 on an empty string
 *  or 1 if no delim was found.  
 *
 *  You could rewrite this to return the char ** array instead and upon NULL
 *  know it's an allocation problem but I did the triple array here.  Note that
 *  upon the hitting two delim's in a row "foo,,bar" the array would be:
 *  { "foo", NULL, "bar" } 
 * 
 *  You need to define the semantics of a trailing delim Like "foo," is that a
 *  2 count array or an array of one?  I choose the two count with the second entry
 *  set to NULL since it's valueless.
 *  Modifies str so make a copy if this is a problem
 */
int split( char * str, char delim, char ***array, int *length ) {
  char *p;
  char **res;
  int count=0;
  int k=0;

  p = str;
  // Count occurance of delim in string
  while( (p=strchr(p,delim)) != NULL ) {
    *p = 0; // Null terminate the deliminator.
    p++; // Skip past our new null
    count++;
  }

  // allocate dynamic array
  res = calloc( 1, count * sizeof(char *));
  if( !res ) return -1;

  p = str;
  for( k=0; k<count; k++ ){
    if( *p ) res[k] = p;  // Copy start of string
    p = strchr(p, 0 );    // Look for next null
    p++; // Start of next string
  }

  *array = res;
  *length = count;

  return 0;
}

char str[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,";

int main() {
  char **res;
  int k=0;
  int count =0;
  int rc;

  rc = split( str, ',', &res, &count );
  if( rc ) {
    printf("Error: %s errno: %d \n", strerror(errno), errno);
  }

  printf("count: %d\n", count );
  for( k=0; k<count; k++ ) {
    printf("str: %s\n", res[k]);
  }

  free(res );
  return 0;
}

This is a string splitting function that can handle multi-character delimiters. Note that if the delimiter is longer than the string that is being split, then buffer and stringLengths will be set to (void *) 0, and numStrings will be set to 0.

This algorithm has been tested, and works. (Disclaimer: It has not been tested for non-ASCII strings, and it assumes that the caller gave valid parameters)

void splitString(const char *original, const char *delimiter, char ** * buffer, int * numStrings, int * * stringLengths){
    const int lo = strlen(original);
    const int ld = strlen(delimiter);
    if(ld > lo){
        *buffer = (void *)0;
        *numStrings = 0;
        *stringLengths = (void *)0;
        return;
    }

    *numStrings = 1;

    for(int i = 0;i < (lo - ld);i++){
        if(strncmp(&original[i], delimiter, ld) == 0) {
            i += (ld - 1);
            (*numStrings)++;
        }
    }

    *stringLengths = (int *) malloc(sizeof(int) * *numStrings);

    int currentStringLength = 0;
    int currentStringNumber = 0;
    int delimiterTokenDecrementCounter = 0;
    for(int i = 0;i < lo;i++){
        if(delimiterTokenDecrementCounter > 0){
            delimiterTokenDecrementCounter--;
        } else if(i < (lo - ld)){
            if(strncmp(&original[i], delimiter, ld) == 0){
                (*stringLengths)[currentStringNumber] = currentStringLength;
                currentStringNumber++;
                currentStringLength = 0;
                delimiterTokenDecrementCounter = ld - 1;
            } else {
                currentStringLength++;
            }
        } else {
            currentStringLength++;
        }

        if(i == (lo - 1)){
            (*stringLengths)[currentStringNumber] = currentStringLength;
        }
    }

    *buffer = (char **) malloc(sizeof(char *) * (*numStrings));
    for(int i = 0;i < *numStrings;i++){
        (*buffer)[i] = (char *) malloc(sizeof(char) * ((*stringLengths)[i] + 1));
    }

    currentStringNumber = 0;
    currentStringLength = 0;
    delimiterTokenDecrementCounter = 0;
    for(int i = 0;i < lo;i++){
        if(delimiterTokenDecrementCounter > 0){
            delimiterTokenDecrementCounter--;
        } else if(currentStringLength >= (*stringLengths)[currentStringNumber]){
            (*buffer)[currentStringNumber][currentStringLength] = 0;
            delimiterTokenDecrementCounter = ld - 1;
            currentStringLength = 0;
            currentStringNumber++;
        } else {
            (*buffer)[currentStringNumber][currentStringLength] = (char)original[i];
            currentStringLength++;
        }
    }
    buffer[currentStringNumber][currentStringLength] = 0;
}

Sample code:

int main(){
    const char *string = "STRING-1 DELIM string-2 DELIM sTrInG-3";
    char **buffer;
    int numStrings;
    int * stringLengths;

    splitString(string, " DELIM ", &buffer, &numStrings, &stringLengths);

    for(int i = 0;i < numStrings;i++){
        printf("String: %s\n", buffer[i]);
    }
}

Libraries:

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

My approach is to scan the string and let the pointers point to every character after the deliminators(and the first character), at the same time assign the appearances of deliminator in string to '\0'.
First make a copy of original string(since it's constant), then get the number of splits by scan it pass it to pointer parameter len. After that, point the first result pointer to the copy string pointer, then scan the copy string: once encounter a deliminator, assign it to '\0' thus the previous result string is terminated, and point the next result string pointer to the next character pointer.

char** split(char* a_str, const char a_delim, int* len){
    char* s = (char*)malloc(sizeof(char) * strlen(a_str));
    strcpy(s, a_str);
    char* tmp = a_str;
    int count = 0;
    while (*tmp != '\0'){
        if (*tmp == a_delim) count += 1;
        tmp += 1;
    }
    *len = count;
    char** results = (char**)malloc(count * sizeof(char*));
    results[0] = s;
    int i = 1;
    while (*s!='\0'){
        if (*s == a_delim){
            *s = '\0';
            s += 1;
            results[i++] = s;
        }
        else s += 1;
    }
    return results;
}

Method below will do all the job (memory allocation, counting the length) for you. More information and description can be found here - Implementation of Java String.split() method to split C string

int split (const char *str, char c, char ***arr)
{
    int count = 1;
    int token_len = 1;
    int i = 0;
    char *p;
    char *t;

    p = str;
    while (*p != '\0')
    {
        if (*p == c)
            count++;
        p++;
    }

    *arr = (char**) malloc(sizeof(char*) * count);
    if (*arr == NULL)
        exit(1);

    p = str;
    while (*p != '\0')
    {
        if (*p == c)
        {
            (*arr)[i] = (char*) malloc( sizeof(char) * token_len );
            if ((*arr)[i] == NULL)
                exit(1);

            token_len = 0;
            i++;
        }
        p++;
        token_len++;
    }
    (*arr)[i] = (char*) malloc( sizeof(char) * token_len );
    if ((*arr)[i] == NULL)
        exit(1);

    i = 0;
    p = str;
    t = ((*arr)[i]);
    while (*p != '\0')
    {
        if (*p != c && *p != '\0')
        {
            *t = *p;
            t++;
        }
        else
        {
            *t = '\0';
            i++;
            t = ((*arr)[i]);
        }
        p++;
    }

    return count;
}

How to use it:

int main (int argc, char ** argv)
{
    int i;
    char *s = "Hello, this is a test module for the string splitting.";
    int c = 0;
    char **arr = NULL;

    c = split(s, ' ', &arr);

    printf("found %d tokens.\n", c);

    for (i = 0; i < c; i++)
        printf("string #%d: %s\n", i, arr[i]);

    return 0;
}

Try use this.

char** strsplit(char* str, const char* delim){
    char** res = NULL;
    char*  part;
    int i = 0;

    char* aux = strdup(str);

    part = strdup(strtok(aux, delim));

    while(part){
        res = (char**)realloc(res, (i + 1) * sizeof(char*));
        *(res + i) = strdup(part);

        part = strdup(strtok(NULL, delim));
        i++;
    }

    res = (char**)realloc(res, i * sizeof(char*));
    *(res + i) = NULL;

    return res;
}

Here is my two cents:

int split (const char *txt, char delim, char ***tokens)
{
    int *tklen, *t, count = 1;
    char **arr, *p = (char *) txt;

    while (*p != '\0') if (*p++ == delim) count += 1;
    t = tklen = calloc (count, sizeof (int));
    for (p = (char *) txt; *p != '\0'; p++) *p == delim ? *t++ : (*t)++;
    *tokens = arr = malloc (count * sizeof (char *));
    t = tklen;
    p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
    while (*txt != '\0')
    {
        if (*txt == delim)
        {
            p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
            txt++;
        }
        else *p++ = *txt++;
    }
    free (tklen);
    return count;
}

Usage:

char **tokens;
int count, i;
const char *str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";

count = split (str, ',', &tokens);
for (i = 0; i < count; i++) printf ("%s\n", tokens[i]);

/* freeing tokens */
for (i = 0; i < count; i++) free (tokens[i]);
free (tokens);

I think the following solution is ideal:

  • Doesn't destroy the source string
  • Re-entrant - i.e., you can safely call it from anywhere in one or more threads
  • Portable
  • Handles multiple separators correctly
  • Fast and efficient

Explanation of the code:

  1. Define a structure token to store the address and lengths of the tokens
  2. Allocate enough memory for these in the worst case, which is when str is made up entirely of separators so there are strlen(str) + 1 tokens, all of them empty strings
  3. Scan str recording the address and length of every token
  4. Use this to allocate the output array of the correct size, including an extra space for a NULL sentinel value
  5. Allocate, copy, and add the tokens using the start and length information - use memcpy as it's faster than strcpy and we know the lengths
  6. Free the token address and length array
  7. Return the array of tokens
typedef struct {
    const char *start;
    size_t len;
} token;

char **split(const char *str, char sep)
{
    char **array;
    unsigned int start = 0, stop, toks = 0, t;
    token *tokens = malloc((strlen(str) + 1) * sizeof(token));
    for (stop = 0; str[stop]; stop++) {
        if (str[stop] == sep) {
            tokens[toks].start = str + start;
            tokens[toks].len = stop - start;
            toks++;
            start = stop + 1;
        }
    }
    /* Mop up the last token */
    tokens[toks].start = str + start;
    tokens[toks].len = stop - start;
    toks++;
    array = malloc((toks + 1) * sizeof(char*));
    for (t = 0; t < toks; t++) {
        /* Calloc makes it nul-terminated */
        char *token = calloc(tokens[t].len + 1, 1);
        memcpy(token, tokens[t].start, tokens[t].len);
        array[t] = token;
    }
    /* Add a sentinel */
    array[t] = NULL; 
    free(tokens);
    return array;
}

Note malloc checking omitted for brevity.

In general, I wouldn't return an array of char * pointers from a split function like this as it places a lot of responsibility on the caller to free them correctly. An interface I prefer is to allow the caller to pass a callback function and call this for every token, as I have described here: Split a String in C.


Examples related to c

conflicting types for 'outchar' Can't compile C program on a Mac after upgrade to Mojave Program to find largest and second largest number in array Prime numbers between 1 to 100 in C Programming Language In c, in bool, true == 1 and false == 0? How I can print to stderr in C? Visual Studio Code includePath "error: assignment to expression with array type error" when I assign a struct field (C) Compiling an application for use in highly radioactive environments How can you print multiple variables inside a string using printf?

Examples related to string

How to split a string in two and store it in a field String method cannot be found in a main class method Kotlin - How to correctly concatenate a String Replacing a character from a certain index Remove quotes from String in Python Detect whether a Python string is a number or a letter How does String substring work in Swift How does String.Index work in Swift swift 3.0 Data to String? How to parse JSON string in Typescript

Examples related to split

Parameter "stratify" from method "train_test_split" (scikit Learn) Pandas split DataFrame by column value How to split large text file in windows? Attribute Error: 'list' object has no attribute 'split' Split function in oracle to comma separated values with automatic sequence How would I get everything before a : in a string Python Split String by delimiter position using oracle SQL JavaScript split String with white space Split a String into an array in Swift? Split pandas dataframe in two if it has more than 10 rows