I'm trying to make a program that splits the string based on a specific character.
Data Structure used:
typedef struct pieces {
char **members;
size_t len;
} pieces;
Function declarations:
pieces split (const char *s, const char c);
size_t charCount (const char *s, const char c);
char *slice (const char *s, int a, int b);
size_t indexOf (const char *s, const char c, size_t start);
charCount -> No. of times the char appeared in string.
indexOf -> Returns the index of a first occurrence of the given character inside the string, starting from the index start; i.e. indexOf("Stack Overflow", 'O', 0) == indexOf("Stack Overflow", 'O', 3)
I've implemented slice like this:
char *slice (const char *s, int a, int b)
{
if (a > b || a == b)
return NULL;
if (b > strlen(s)) // Only slice upto end if tried to slice out of index
b = strlen(s);
size_t len = b - a 1;
char *slice = malloc(sizeof(char) * len);
for (size_t i = a; i < b; i )
slice[i - a] = s[i];
slice[len - 1] = '\0';
return slice;
}
I'm confused on split function:
pieces split (const char *s, const char c)
{
// Is this the right way to make room for incoming slices ?
pieces arr;
arr.len = charCount(s, c) 1;
arr.members = malloc(sizeof(char *) * arr.len);
// Should I do something like this to insert slices ?
for (size_t i = 0; i < strlen(s);)
{
int seperator_idx = indexOf(s, c, i);
char *piece = slice(s, i, seperator_idx);
arr.members[i] = piece; // Should I use strdup ??
i = seperator_idx 1;
}
// What about the last slice ?
return arr;
}
CodePudding user response:
To split a string on 1 character, I would do something like:
#include <string.h>
#include <stdlib.h>
int count_words(char const *str, char const delim)
{
int count = 0;
int i = 0;
for (; str[i]; i ) {
// the next character is the beggining of a new string
if (str[i] == delim && str[i 1] != delim)
count ;
}
// for safety
if (str[i - 1] != delim)
count ;
return count;
}
int word_length(char const *str, char const delim)
{
int length = 0;
// while we're on a valid character, increase the word length
while (str[length] && str[length] != delim)
length ;
return length;
}
pieces split(char const *str, char const delim)
{
// move the pointer until we're not on the delimiter
while (*str == delim)
str ;
// prepare the string array
pieces p;
p.len = count_words(str, delim);
p.members = malloc(p.len * sizeof(char *));
// for each string
for (int i = 0; i < p.len; i ) {
// copy the string
int length = word_length(str, delim);
p.members[i] = strndup(str, length);
// move the pointer until we're not on the delimiter
str = length;
while (*str == delim)
str ;
}
return p;
}
CodePudding user response:
There are some issues with the proposed prototypes:
pieces split(const char *s, const char c);it is unclear if consecutive occurrences ofcrepresent empty substrings or a single separator (as instrtok). Let's assume empty substrings should be accepted.constqualifyingcis overkill and not meaningless in a prototypesize_t charCount(const char *s, const char c);same remark aboutconst char c. Let's assume the null terminator is not part of the string socharCount("abc", '\0')is zero.char *slice(const char *s, int a, int b);why areaandbtypedintinstead ofsize_t?size_t indexOf(const char *s, const char c, size_t start);what should this function return in casecis not found the string starting from indexstart? Let's assume the offset of the end of string should be returned, as it is more convenient to implementslice.
With these conventions, indexOf and charCount can be written as:
#include <stddef.h>
size_t indexOf(const char *s, const char c, size_t start) {
while (s[start] && s[start] != c)
start ;
return start;
}
size_t charCount(const char *s, const char c) {
size_t count = 0;
while (*s) {
count = (*s == c);
}
return count;
}
Your slice function has multiple problems:
- It should return an empty string if
a == b, - it is confusing to name
lensomething that is not the length of the substring. Either definelenassize_t len = b - a;or usesize_t size = b - a 1; - it has undefined behavior if
ais larger thanstrlen(s)andb > a. - you should gracefully return
NULLin case ofmalloc()failure
Here is a modified version:
#include <stdlib.h>
/* return an empty string if a >= b */
char *slice(const char *s, size_t a, size_t b) {
size_t len = strlen(s);
if (a > len)
a = len;
if (b < a)
b = a;
char *slice = malloc(b - a 1);
if (slice != NULL) {
for (size_t i = a; i < b; i )
slice[i - a] = s[i];
slice[b - a] = '\0';
}
return slice;
}
The split function also has problems:
- naming the
piecesstructurearris confusing: it is not an array. - your allocation for
arr.membersis correct, but you should test if was allocated successful. - there is no need to
strdup()the return value ofslice, which was allocated withmalloc(). - you should use 2 separate index variables for the index
iinto the arrayarr.membersand the index of the start of the substring. - the loop should be written with a test so
split("", c)return a single empty string. - if
indexOfreturns the end of the string ifccannot be found, no special case is needed for the last slice.
Here is a modified version:
pieces split(const char *s, const char c) {
pieces arr;
arr.len = charCount(s, c) 1;
arr.members = malloc(sizeof(*arr.members) * arr.len);
if (arr.members != NULL) {
for (size_t i = 0, start = 0; i < arr.len; i ) {
size_t end = indexOf(s, c, start);
arr.members[i] = slice(s, start, end);
start = end 1;
if (arr.members[i] == NULL) {
/* free previous substrings and the members array */
while (i-- > 0) {
free(arr.members[i]);
}
free(arr.members);
arr.members = NULL;
break;
}
}
}
return arr;
}
Note these final remarks:
splitas coded above works too ifindexOf()returns(size_t)(-1)when the character is not found in the string.recomputing the length of the string in
slice()is wasteful.slice()should assume that the argument values are correct:0<=a<b<=strlen(s).there is no direct way for
splitto return an error. Setting thememberstoNULLseems a workable solution.instead of
slice(), and assumingindexOfreturns a valid offset into the string, you could use the POSIX standard functionstrndup():arr.members[i] = strndup(s start, end - start);
