|
|
|
@ -5,97 +5,132 @@ Copyright 1990 Regents of the University of California. All rights reserved. |
|
|
|
/* |
|
|
|
* String functions |
|
|
|
*/ |
|
|
|
#include <ctype.h> |
|
|
|
#include <stdarg.h> |
|
|
|
|
|
|
|
#include "ngspice/ngspice.h" |
|
|
|
#include "ngspice/stringutil.h" |
|
|
|
#include "ngspice/stringskip.h" |
|
|
|
#include "ngspice/dstring.h" |
|
|
|
|
|
|
|
#include <stdarg.h> |
|
|
|
|
|
|
|
/* Instantiations of string functions in case inlining is not performed */ |
|
|
|
char *copy(const char *str); |
|
|
|
char *copy_substring(const char *str, const char *end); |
|
|
|
int scannum(const char *str); |
|
|
|
int substring(const char *sub, const char *str); |
|
|
|
|
|
|
|
int |
|
|
|
prefix(const char *p, const char *s) |
|
|
|
|
|
|
|
|
|
|
|
static size_t get_kr_msb_factor(size_t n); |
|
|
|
static size_t kr_hash(size_t n, const char *p); |
|
|
|
static inline const char *next_substr( |
|
|
|
size_t n_char_pattern, const char *p_pattern, |
|
|
|
const char **pp_string, const char * const p_last, |
|
|
|
const size_t msb_factor, const size_t h_pattern, size_t *p_h_string); |
|
|
|
static bool can_overlap(size_t n_char_pattern, const char * const p_pattern); |
|
|
|
|
|
|
|
|
|
|
|
/* This function returns true if the string s begins with the |
|
|
|
* string p and false otherwise. */ |
|
|
|
int prefix(const char *p, const char *s) |
|
|
|
{ |
|
|
|
while (*p && (*p == *s)) |
|
|
|
p++, s++; |
|
|
|
|
|
|
|
return *p == '\0'; |
|
|
|
} |
|
|
|
} /* end of function prefix */ |
|
|
|
|
|
|
|
|
|
|
|
/* Create a copy of a string. */ |
|
|
|
|
|
|
|
char * |
|
|
|
copy(const char *str) |
|
|
|
/* This function returns 1 if string begins with prefix and 0 otherwise. |
|
|
|
* Neither the prefix nor string needs a null termination. */ |
|
|
|
int prefix_n(size_t n_char_prefix, const char *prefix, |
|
|
|
size_t n_char_string, const char *string) |
|
|
|
{ |
|
|
|
char *p; |
|
|
|
/*Test that string is long enough */ |
|
|
|
if (n_char_prefix > n_char_string) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
return memcmp(prefix, string, n_char_prefix) == 0; |
|
|
|
} /* end of function prefix_n */ |
|
|
|
|
|
|
|
if (!str) |
|
|
|
return NULL; |
|
|
|
|
|
|
|
if ((p = TMALLOC(char, strlen(str) + 1)) != NULL) |
|
|
|
(void) strcpy(p, str); |
|
|
|
return p; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* copy a substring, from 'str' to 'end' |
|
|
|
* including *str, excluding *end |
|
|
|
/* This function allocates a buffer and copies the specified number of |
|
|
|
* characters from the input string into the buffer followed by a |
|
|
|
* terminating null. |
|
|
|
* |
|
|
|
* Paramters |
|
|
|
* str: String to copy |
|
|
|
* n_char: Number of characters to copy |
|
|
|
* |
|
|
|
* Return values |
|
|
|
* NULL: Allocation failure |
|
|
|
* otherwise: The initialized string. |
|
|
|
*/ |
|
|
|
char * |
|
|
|
copy_substring(const char *str, const char *end) |
|
|
|
char *dup_string(const char *str, size_t n_char) |
|
|
|
{ |
|
|
|
size_t n = (size_t) (end - str); |
|
|
|
char *p; |
|
|
|
|
|
|
|
if ((p = TMALLOC(char, n + 1)) != NULL) { |
|
|
|
(void) strncpy(p, str, n); |
|
|
|
p[n] = '\0'; |
|
|
|
if ((p = TMALLOC(char, n_char + 1)) != NULL) { |
|
|
|
(void) strncpy(p, str, n_char); |
|
|
|
p[n_char] = '\0'; |
|
|
|
} |
|
|
|
return p; |
|
|
|
} |
|
|
|
} /* end of function dup_string */ |
|
|
|
|
|
|
|
|
|
|
|
char * |
|
|
|
tvprintf(const char *fmt, va_list args) |
|
|
|
|
|
|
|
char *tvprintf(const char *fmt, va_list args) |
|
|
|
{ |
|
|
|
char buf[1024]; |
|
|
|
char *p = buf; |
|
|
|
int size = sizeof(buf); |
|
|
|
int nchars; |
|
|
|
|
|
|
|
for (;;) { |
|
|
|
|
|
|
|
int nchars; |
|
|
|
va_list ap; |
|
|
|
|
|
|
|
va_copy(ap, args); |
|
|
|
nchars = vsnprintf(p, (size_t) size, fmt, ap); |
|
|
|
va_end(ap); |
|
|
|
|
|
|
|
if (nchars == -1) { // compatibility to old implementations |
|
|
|
size *= 2; |
|
|
|
} |
|
|
|
else if (nchars >= size) { |
|
|
|
/* Output was truncated. Returned value is the number of chars |
|
|
|
* that would have been written if the buffer were large enough |
|
|
|
* excluding the terminiating null. */ |
|
|
|
size = nchars + 1; /* min required allocation size */ |
|
|
|
/* This case was previously handled by doubling the size of |
|
|
|
* the buffer for "compatibility to old implementations." |
|
|
|
* However, vsnprintf is defined in both C99 and SUSv2 from 1997. |
|
|
|
* There is a slight difference which does not affect this |
|
|
|
* usage, but both return negative values (possibly -1) on an |
|
|
|
* encoding error, which would lead to an infinte loop (until |
|
|
|
* memory was exhausted) with the old behavior */ |
|
|
|
if (nchars < 0) { |
|
|
|
controlled_exit(-1); |
|
|
|
} |
|
|
|
else { /* String formatted OK */ |
|
|
|
|
|
|
|
if (nchars < size) { /* String formatted OK */ |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
/* Output was truncated. Returned value is the number of chars |
|
|
|
* that would have been written if the buffer were large enough |
|
|
|
* excluding the terminiating null. */ |
|
|
|
size = nchars + 1; /* min required allocation size */ |
|
|
|
|
|
|
|
/* Allocate a larger buffer */ |
|
|
|
if (p == buf) |
|
|
|
if (p == buf) { |
|
|
|
p = TMALLOC(char, size); |
|
|
|
else |
|
|
|
} |
|
|
|
else { |
|
|
|
p = TREALLOC(char, p, size); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Return the formatted string, making a copy on the heap if the |
|
|
|
* stack's buffer (buf) contains the string */ |
|
|
|
return (p == buf) ? copy(p) : p; |
|
|
|
return (p == buf) ? dup_string(p, (size_t) nchars) : p; |
|
|
|
} /* end of function tvprintf */ |
|
|
|
|
|
|
|
|
|
|
|
@ -103,8 +138,7 @@ tvprintf(const char *fmt, va_list args) |
|
|
|
/* This function returns an allocation containing the string formatted |
|
|
|
* according to fmt and the variadic argument list provided. It is a wrapper |
|
|
|
* around tvprintf() which processes the argumens as a va_list. */ |
|
|
|
char * |
|
|
|
tprintf(const char *fmt, ...) |
|
|
|
char *tprintf(const char *fmt, ...) |
|
|
|
{ |
|
|
|
char *rv; |
|
|
|
va_list ap; |
|
|
|
@ -117,103 +151,173 @@ tprintf(const char *fmt, ...) |
|
|
|
} /* end of function tprintf */ |
|
|
|
|
|
|
|
|
|
|
|
/* Determine whether sub is a substring of str. */ |
|
|
|
/* Like strstr( ) XXX */ |
|
|
|
|
|
|
|
int |
|
|
|
substring(const char *sub, const char *str) |
|
|
|
{ |
|
|
|
for (; *str; str++) |
|
|
|
if (*str == *sub) { |
|
|
|
const char *s = sub, *t = str; |
|
|
|
for (; *s; s++, t++) |
|
|
|
if (!*t || (*s != *t)) |
|
|
|
break; |
|
|
|
if (*s == '\0') |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
|
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Append one character to a string. Don't check for overflow. */ |
|
|
|
/* Almost like strcat( ) XXX */ |
|
|
|
|
|
|
|
void |
|
|
|
appendc(char *s, char c) |
|
|
|
void appendc(char *s, char c) |
|
|
|
{ |
|
|
|
while (*s) |
|
|
|
while (*s) { |
|
|
|
s++; |
|
|
|
} |
|
|
|
*s++ = c; |
|
|
|
*s = '\0'; |
|
|
|
} |
|
|
|
} /* end of function appendc */ |
|
|
|
|
|
|
|
|
|
|
|
/* Try to identify an integer that begins a string. Stop when a non- |
|
|
|
* numeric character is reached. |
|
|
|
*/ |
|
|
|
/* Like atoi( ) XXX */ |
|
|
|
|
|
|
|
int |
|
|
|
scannum(char *str) |
|
|
|
/* Returns the unsigned number at *p_str or 0 if there is none. *p_str |
|
|
|
* points to the first character after the number that was read, so |
|
|
|
* it is possible to distingish between the value 0 and a missing number |
|
|
|
* by testing if the string has been advanced. */ |
|
|
|
int scannum_adv(char **p_str) |
|
|
|
{ |
|
|
|
const char *str = *p_str; |
|
|
|
int i = 0; |
|
|
|
|
|
|
|
while (isdigit_c(*str)) |
|
|
|
while (isdigit_c(*str)) { |
|
|
|
i = i * 10 + *(str++) - '0'; |
|
|
|
} |
|
|
|
|
|
|
|
*p_str = (char *) str; /* locate end of number */ |
|
|
|
return i; |
|
|
|
} |
|
|
|
} /* end of function scannum_adv */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function returns the integer at the current string location. |
|
|
|
* The string does not need to be null-terminated. |
|
|
|
* |
|
|
|
* Parameters |
|
|
|
* str: String containing the integer to return at the beginning |
|
|
|
* n: Number of characters in the string |
|
|
|
* p_value: Address where the integer is returned |
|
|
|
* |
|
|
|
* Return values |
|
|
|
* -1: No integer present |
|
|
|
* -2: Overflow |
|
|
|
* >0: Number of characters in the integer |
|
|
|
*/ |
|
|
|
int get_int_n(const char *str, size_t n, int *p_value) |
|
|
|
{ |
|
|
|
if (n == 0) { /* no string */ |
|
|
|
return -1; |
|
|
|
} |
|
|
|
|
|
|
|
unsigned int value = 0; |
|
|
|
const char *p_cur = str; |
|
|
|
const char * const p_end = str + n; |
|
|
|
bool f_neg; |
|
|
|
if (*p_cur == '-') { /* Check for leading negative sign */ |
|
|
|
f_neg = 1; |
|
|
|
++p_cur; |
|
|
|
} |
|
|
|
else { |
|
|
|
f_neg = 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Iterate over chars until end or char that is not numeric */ |
|
|
|
for ( ; p_cur != p_end; ++p_cur) { |
|
|
|
char ch_cur = *str; |
|
|
|
if (!isdigit(ch_cur)) { /* Test for exit due to non-numeric char */ |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
/* Compute new value and check for overflow. */ |
|
|
|
const unsigned int value_new = 10 * value + (ch_cur - '0'); |
|
|
|
if (value_new < value) { |
|
|
|
return -2; |
|
|
|
} |
|
|
|
value = value_new; |
|
|
|
} /* end of loop over digits */ |
|
|
|
|
|
|
|
/* Test for at least one digit */ |
|
|
|
if (p_cur == str + f_neg) { |
|
|
|
return -1; /* no digit */ |
|
|
|
} |
|
|
|
|
|
|
|
/* Test for overflow. |
|
|
|
* If negative, can be 1 greater (-2**n vs 2**n -1) */ |
|
|
|
if (value - f_neg > INT_MAX) { |
|
|
|
return -2; |
|
|
|
} |
|
|
|
|
|
|
|
/* Take negative if negative sign present. (This operation works |
|
|
|
* correctly if value == INT_MIN since -INT_MIN == INT_MIN */ |
|
|
|
*p_value = f_neg ? -(int) value : (int) value; |
|
|
|
|
|
|
|
return (int) (p_cur - str); /* number of chars in the number */ |
|
|
|
} /* end of function get_int_n */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Case insensitive str eq. */ |
|
|
|
/* Like strcasecmp( ) XXX */ |
|
|
|
|
|
|
|
int |
|
|
|
cieq(const char *p, const char *s) |
|
|
|
int cieq(const char *p, const char *s) |
|
|
|
{ |
|
|
|
for (; *p; p++, s++) |
|
|
|
if (tolower_c(*p) != tolower_c(*s)) |
|
|
|
for (; *p; p++, s++) { |
|
|
|
if (tolower_c(*p) != tolower_c(*s)) { |
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return *s == '\0'; |
|
|
|
} |
|
|
|
} /* end of function cieq */ |
|
|
|
|
|
|
|
|
|
|
|
/* Case insensitive prefix. */ |
|
|
|
|
|
|
|
int |
|
|
|
ciprefix(const char *p, const char *s) |
|
|
|
/* Case-insensitive string compare fore equialty with explicit length |
|
|
|
* given. Neither character array needs to be null terminated. By not |
|
|
|
* including the trailing null in the count, it can be used to check |
|
|
|
* for a prefix. This function is useful for avoiding string copies |
|
|
|
* to temporary buffers and the potential for buffer overruns that |
|
|
|
* can occur when using temporary buffers without checking lengths. */ |
|
|
|
int cieqn(const char *p, const char *s, size_t n) |
|
|
|
{ |
|
|
|
size_t i; |
|
|
|
for (i = 0; i < n; ++i) { |
|
|
|
if (tolower_c(p[i]) != tolower_c(s[i])) { |
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
} |
|
|
|
return TRUE; /* all chars matched */ |
|
|
|
} /* end of function cineq */ |
|
|
|
|
|
|
|
|
|
|
|
/* Case insensitive prefix. */ |
|
|
|
int ciprefix(const char *p, const char *s) |
|
|
|
{ |
|
|
|
for (; *p; p++, s++) |
|
|
|
if (tolower_c(*p) != tolower_c(*s)) |
|
|
|
if (tolower_c(*p) != tolower_c(*s)) { |
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
|
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
} /* end of function ciprefix */ |
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
|
strtolower(char *str) |
|
|
|
|
|
|
|
void strtolower(char *str) |
|
|
|
{ |
|
|
|
if (!str) |
|
|
|
if (!str) { |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
for (; *str; str++) |
|
|
|
for (; *str; str++) { |
|
|
|
*str = tolower_c(*str); |
|
|
|
} |
|
|
|
} |
|
|
|
} /* end of function strtolower */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
|
strtoupper(char *str) |
|
|
|
void strtoupper(char *str) |
|
|
|
{ |
|
|
|
if (!str) |
|
|
|
if (!str) { |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
for (; *str; str++) |
|
|
|
for (; *str; str++) { |
|
|
|
*str = toupper_c(*str); |
|
|
|
} |
|
|
|
} |
|
|
|
} /* end of function strtoupper */ |
|
|
|
|
|
|
|
|
|
|
|
#ifdef CIDER |
|
|
|
@ -230,18 +334,21 @@ strtoupper(char *str) |
|
|
|
* first n characters are the same |
|
|
|
*/ |
|
|
|
|
|
|
|
int |
|
|
|
cinprefix(char *p, char *s, int n) |
|
|
|
int cinprefix(char *p, char *s, int n) |
|
|
|
{ |
|
|
|
if (!p || !s) |
|
|
|
if (!p || !s) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
for (; *p; p++, s++, n--) |
|
|
|
if (tolower_c(*p) != tolower_c(*s)) |
|
|
|
for (; *p; p++, s++, n--) { |
|
|
|
if (tolower_c(*p) != tolower_c(*s)) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return n <= 0; |
|
|
|
} |
|
|
|
} /* end of function cinprefix */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
|
@ -410,6 +517,41 @@ gettok_iv(char **s) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* findtok_noparen() does the string scanning for gettok_noparens() but |
|
|
|
* does not allocate a token. Hence it is useful when a copy of the token |
|
|
|
* is not required */ |
|
|
|
void findtok_noparen(char **p_str, char **p_token, char **p_token_end) |
|
|
|
{ |
|
|
|
char *str = *p_str; |
|
|
|
|
|
|
|
str = skip_ws(str); |
|
|
|
|
|
|
|
if (!*str) { |
|
|
|
*p_str = str; |
|
|
|
*p_token = (char *) NULL; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
*p_token = str; /* Token starts after whitespace */ |
|
|
|
{ |
|
|
|
char c; |
|
|
|
while ((c = *str) != '\0' && |
|
|
|
!isspace_c(c) && |
|
|
|
(c != '(') && |
|
|
|
(c != ')') && |
|
|
|
(c != ',') |
|
|
|
) { |
|
|
|
str++; |
|
|
|
} |
|
|
|
} |
|
|
|
*p_token_end = str; |
|
|
|
|
|
|
|
str = skip_ws(str); |
|
|
|
*p_str = str; |
|
|
|
} /* end of function findtok_noparen */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------* |
|
|
|
* gettok_noparens was added by SDB on 4.21.2003. |
|
|
|
* It acts like gettok, except that it treats parens and commas like |
|
|
|
@ -417,33 +559,17 @@ gettok_iv(char **s) |
|
|
|
* parsing and returns when it finds one of those chars. It is called from |
|
|
|
* 'translate' (subckt.c). |
|
|
|
*-------------------------------------------------------------------------*/ |
|
|
|
|
|
|
|
char * |
|
|
|
gettok_noparens(char **s) |
|
|
|
char *gettok_noparens(char **s) |
|
|
|
{ |
|
|
|
char c; |
|
|
|
const char *token, *token_e; |
|
|
|
|
|
|
|
*s = skip_ws(*s); |
|
|
|
|
|
|
|
if (!**s) |
|
|
|
return NULL; /* return NULL if we come to end of line */ |
|
|
|
|
|
|
|
token = *s; |
|
|
|
while ((c = **s) != '\0' && |
|
|
|
!isspace_c(c) && |
|
|
|
(**s != '(') && |
|
|
|
(**s != ')') && |
|
|
|
(**s != ',') |
|
|
|
) { |
|
|
|
(*s)++; |
|
|
|
char *token, *token_e; |
|
|
|
findtok_noparen(s, &token, &token_e); |
|
|
|
if (token == (char *) NULL) { |
|
|
|
return (char *) NULL; /* return NULL if we come to end of line */ |
|
|
|
} |
|
|
|
token_e = *s; |
|
|
|
|
|
|
|
*s = skip_ws(*s); |
|
|
|
|
|
|
|
return copy_substring(token, token_e); |
|
|
|
} |
|
|
|
} /* end of function gettok_noparens */ |
|
|
|
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------* |
|
|
|
* gettok_model acts like gettok_noparens, however when it encounters a '{', |
|
|
|
@ -746,8 +872,8 @@ str_has_arith_char(char *s) |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
|
get_comma_separated_values(char *values[], char *str) { |
|
|
|
int get_comma_separated_values(char *values[], char *str) |
|
|
|
{ |
|
|
|
int count = 0; |
|
|
|
char *comma_ptr; |
|
|
|
|
|
|
|
@ -769,9 +895,7 @@ get_comma_separated_values(char *values[], char *str) { |
|
|
|
modulo a trailing model binning extension '\.[0-9]+' |
|
|
|
then return 2 |
|
|
|
*/ |
|
|
|
|
|
|
|
int |
|
|
|
model_name_match(const char *token, const char *model_name) |
|
|
|
int model_name_match(const char *token, const char *model_name) |
|
|
|
{ |
|
|
|
const char *p; |
|
|
|
size_t token_len = strlen(token); |
|
|
|
@ -799,7 +923,443 @@ model_name_match(const char *token, const char *model_name) |
|
|
|
return 0; |
|
|
|
|
|
|
|
return 2; |
|
|
|
} |
|
|
|
} /* end of funtion model_name_match */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function returns 1 if pattern is a substring anywhere in str and |
|
|
|
* 0 otherwise. A null pattern is considered a mismatch. |
|
|
|
* |
|
|
|
* Uses Karp-Rabin substring matching with base=256 and modulus=1009 |
|
|
|
*/ |
|
|
|
int substring_n(size_t n_char_pattern, const char *p_pattern, |
|
|
|
size_t n_char_string, const char *p_string) |
|
|
|
{ |
|
|
|
/* Test for a pattern to match */ |
|
|
|
if (n_char_pattern == 0) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Test for a string of sufficient length */ |
|
|
|
if (n_char_pattern > n_char_string) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Factor for rolling hash computation */ |
|
|
|
const size_t msb_factor = get_kr_msb_factor(n_char_pattern); |
|
|
|
|
|
|
|
const size_t h_pattern = kr_hash(n_char_pattern, p_pattern); |
|
|
|
size_t h_string = kr_hash(n_char_pattern, p_string); |
|
|
|
|
|
|
|
/* Compare at beginning. If hashes match, do full compare */ |
|
|
|
if (h_pattern == h_string && |
|
|
|
memcmp(p_pattern, p_string, n_char_pattern) == 0) { |
|
|
|
return 1; /* match at start */ |
|
|
|
} |
|
|
|
|
|
|
|
/* Compare at each possible starting point in the string */ |
|
|
|
const char *p_last = p_string + (n_char_string - n_char_pattern - 1); |
|
|
|
|
|
|
|
return next_substr(n_char_pattern, p_pattern, &p_string, p_last, |
|
|
|
msb_factor, h_pattern, &h_string) == (char *) NULL ? |
|
|
|
0 : 1; |
|
|
|
} /* end of function substring_n */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function initializes a scan for substring matches */ |
|
|
|
void substring_match_init(size_t n_char_pattern, const char *p_pattern, |
|
|
|
size_t n_char_string, const char *p_string, bool f_overlap, |
|
|
|
struct substring_match_info *p_scan_state) |
|
|
|
{ |
|
|
|
/* Save input info into structure. Note that the strings are not |
|
|
|
* copied, so they must remain allocated and unaltered while the |
|
|
|
* search is in progress. */ |
|
|
|
p_scan_state->n_char_pattern = n_char_pattern; |
|
|
|
p_scan_state->p_pattern = p_pattern; |
|
|
|
p_scan_state->n_char_string = n_char_string; |
|
|
|
p_scan_state->p_string = p_string; |
|
|
|
|
|
|
|
/*** Calculate intermediate data ***/ |
|
|
|
|
|
|
|
/* Test for a pattern to match */ |
|
|
|
if (n_char_pattern == 0) { |
|
|
|
p_scan_state->f_done = TRUE; |
|
|
|
} |
|
|
|
/* Test for a string of sufficient length */ |
|
|
|
else if (n_char_pattern > n_char_string) { |
|
|
|
p_scan_state->f_done = TRUE; |
|
|
|
} |
|
|
|
else { |
|
|
|
p_scan_state->f_done = FALSE; |
|
|
|
|
|
|
|
/* Look for overlaps only if possible */ |
|
|
|
p_scan_state->f_overlap= f_overlap ? |
|
|
|
!can_overlap(n_char_pattern, p_pattern) : FALSE; |
|
|
|
p_scan_state->n_char_pattern_1 = n_char_pattern - 1; |
|
|
|
p_scan_state->msb_factor = get_kr_msb_factor(n_char_pattern); |
|
|
|
p_scan_state->h_pattern = kr_hash(n_char_pattern, p_pattern); |
|
|
|
p_scan_state->h_string = kr_hash(n_char_pattern, p_string); |
|
|
|
p_scan_state->p_last = |
|
|
|
p_string + (n_char_string - n_char_pattern - 1); |
|
|
|
} |
|
|
|
|
|
|
|
return; |
|
|
|
} /* end of function substring_match_init */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function finds the next substring match |
|
|
|
* |
|
|
|
* Parameter |
|
|
|
* p_scan_state: Address of struct substring_match_info initialized by |
|
|
|
* substring_match_init() |
|
|
|
* |
|
|
|
* Return value |
|
|
|
* NULL if there is no match or the address of the next match otherwise |
|
|
|
*/ |
|
|
|
char *substring_match_next(struct substring_match_info *p_scan_state) |
|
|
|
{ |
|
|
|
/* First test if there are no more possible matches */ |
|
|
|
if (p_scan_state->f_done) { |
|
|
|
return (char *) NULL; |
|
|
|
} |
|
|
|
|
|
|
|
/* Find next match, if any */ |
|
|
|
const char * const p_match = next_substr( |
|
|
|
p_scan_state->n_char_pattern, p_scan_state->p_pattern, |
|
|
|
&p_scan_state->p_string, p_scan_state->p_last, |
|
|
|
p_scan_state->msb_factor,p_scan_state->h_pattern, |
|
|
|
&p_scan_state->h_string); |
|
|
|
|
|
|
|
/* Update done status if changed */ |
|
|
|
if (p_match == (char *) NULL) { |
|
|
|
p_scan_state->f_done = TRUE; |
|
|
|
} |
|
|
|
else { |
|
|
|
if (!p_scan_state->f_overlap) { |
|
|
|
p_scan_state->p_string += |
|
|
|
p_scan_state->n_char_pattern_1; /* end of match */ |
|
|
|
p_scan_state->h_string = p_scan_state->h_pattern; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return (char *) p_match; /* Return result */ |
|
|
|
} /* end of function substring_match_next */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef COMPILE_UNUSED_FUNCTIONS |
|
|
|
/* This funtion returns the locations of optionally non-overlapping substring |
|
|
|
* matches. For example, in the string aaaaa, aa is found in non-overlapping |
|
|
|
* locations at 0-based offsets 0 and 2 ahd with overlapping allowed atr |
|
|
|
* offsets 0, 1, 2, and 3 */ |
|
|
|
size_t get_substring_matches(size_t n_char_pattern, const char *p_pattern, |
|
|
|
size_t n_char_string, const char *p_string, |
|
|
|
size_t n_elem_buf, char *p_match_buf, bool f_overlap) |
|
|
|
{ |
|
|
|
/* Test for a pattern to match */ |
|
|
|
if (n_char_pattern == 0) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Test for a string of sufficient length */ |
|
|
|
if (n_char_pattern > n_char_string) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Handle 0-sized buffer */ |
|
|
|
if (n_elem_buf == 0) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Factor for rolling hash computation */ |
|
|
|
const size_t msb_factor = get_kr_msb_factor(n_char_pattern); |
|
|
|
|
|
|
|
const size_t h_pattern = kr_hash(n_char_pattern, p_pattern); |
|
|
|
size_t h_string = kr_hash(n_char_pattern, p_string); |
|
|
|
|
|
|
|
/* Compare at beginning. If hashes match, do full compare */ |
|
|
|
if (h_pattern == h_string && |
|
|
|
memcmp(p_pattern, p_string, n_char_pattern) == 0) { |
|
|
|
return 1; /* match at start */ |
|
|
|
} |
|
|
|
|
|
|
|
/* Compare at each possible starting point in the string */ |
|
|
|
const char *p_last = p_string + (n_char_string - n_char_pattern - 1); |
|
|
|
const size_t n_char_pattern_1 = n_char_pattern - 1; |
|
|
|
char **pp_match_buf_cur = &p_match_buf; |
|
|
|
char * const * const pp_match_buf_end = pp_match_buf_cur + n_elem_buf; |
|
|
|
|
|
|
|
/* Look for overlaps only if possible */ |
|
|
|
f_overlap = f_overlap ? !can_overlap(n_char_pattern, p_pattern) : FALSE; |
|
|
|
|
|
|
|
for ( ; pp_match_buf_cur < pp_match_buf_end; pp_match_buf_cur++) { |
|
|
|
const char *p_match = next_substr(n_char_pattern, p_pattern, |
|
|
|
&p_string, p_last, msb_factor, h_pattern, &h_string); |
|
|
|
if (p_match == (char *) NULL) { /* if no match, done */ |
|
|
|
return (int) (pp_match_buf_cur - &p_match_buf); |
|
|
|
} |
|
|
|
|
|
|
|
/* Save result */ |
|
|
|
*pp_match_buf_cur = (char *) p_match; |
|
|
|
|
|
|
|
/* If overlapping is not allowed, contniue search after the match. |
|
|
|
* Note that in this case, the string hash is the pattern hash. */ |
|
|
|
if (!f_overlap) { |
|
|
|
p_string += n_char_pattern_1; /* end of match */ |
|
|
|
h_string = h_pattern; |
|
|
|
} |
|
|
|
} /* end of loop over string */ |
|
|
|
|
|
|
|
return n_elem_buf; /* full buffer */ |
|
|
|
} /* end of funtion get_substring_matches */ |
|
|
|
#endif /* COMPILE_UNUSED_FUNCTIONS */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function determines if a pattern can allow overlapping matches. |
|
|
|
* For example, the pattern "starts" would have overlapped matches in the |
|
|
|
* string "startstarts". |
|
|
|
* |
|
|
|
* Remarks |
|
|
|
* While not directly related to this function, there is only a binary yes/no |
|
|
|
* interest regarding overlap rather than an offset into the the string where |
|
|
|
* such overlap may occur. That is because the hash value is being computed |
|
|
|
* incremetally, so the only time when there is substantial computational |
|
|
|
* savings in this approach is when the hash value is known, as it would be |
|
|
|
* at the end of a match (since the hash of the pattern is knonw.) |
|
|
|
*/ |
|
|
|
static bool can_overlap(size_t n_char_pattern, const char * const p_pattern) |
|
|
|
{ |
|
|
|
if (n_char_pattern < 2) { /* does not matter */ |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
|
|
|
|
/* Find the last occurrance of the first character */ |
|
|
|
const char * const p_end = p_pattern + n_char_pattern; |
|
|
|
const char *p_cur = p_end - 1; |
|
|
|
const char ch_first = *p_pattern; |
|
|
|
for ( ; p_cur > p_pattern; --p_cur) { |
|
|
|
if (*p_cur == ch_first) { |
|
|
|
break; |
|
|
|
} |
|
|
|
} /* end of loop finding the first char */ |
|
|
|
|
|
|
|
/* Test for no duplicate */ |
|
|
|
if (p_cur == p_pattern) { /* not found */ |
|
|
|
return FALSE; /* no duplicate so cannot overlap */ |
|
|
|
} |
|
|
|
|
|
|
|
/* Now must match from this char onward to overlap */ |
|
|
|
const char *p_src = p_pattern; |
|
|
|
for ( ; p_cur != p_end; ++p_cur, ++p_src) { |
|
|
|
if (*p_cur != *p_src) { /* comparing 'b' to 'd' in "abcad" |
|
|
|
* for example */ |
|
|
|
return FALSE; /* Mismatch, so not an overlap */ |
|
|
|
} |
|
|
|
} /* end of loop finding the first char */ |
|
|
|
|
|
|
|
return TRUE; /* Matched to end of word */ |
|
|
|
} /* end of function can_overlap */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Prime number of Karp-Rabin hashing. Tradeoff between number of hash |
|
|
|
* collisions and number of times modulus must be taken. */ |
|
|
|
#define KR_MODULUS 1009 |
|
|
|
/* Compute (256^(n-1))%KR_MODULUS */ |
|
|
|
static size_t get_kr_msb_factor(size_t n) |
|
|
|
{ |
|
|
|
size_t i; |
|
|
|
size_t factor = 1; |
|
|
|
const size_t n_itr = n - 1; |
|
|
|
for (i = 0; i < n_itr; ++i) { |
|
|
|
size_t factor_new = (factor << 8); |
|
|
|
if (factor_new < factor) { /* overflow */ |
|
|
|
factor %= KR_MODULUS; /* take modulus */ |
|
|
|
factor <<= 8; /* and recompute */ |
|
|
|
} |
|
|
|
} /* end of loop building factor */ |
|
|
|
|
|
|
|
/* Return the factor after final modulus if necessary */ |
|
|
|
if (factor >= KR_MODULUS) { |
|
|
|
factor %= KR_MODULUS; |
|
|
|
} |
|
|
|
return factor; |
|
|
|
} /* end of function get_kr_msb_factor */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Compute KR hash assuming n >= 1 */ |
|
|
|
static size_t kr_hash(size_t n, const char *p) |
|
|
|
{ |
|
|
|
const char * const p_end = p + n; |
|
|
|
size_t hash = *(unsigned char *) p; |
|
|
|
for (p++; p < p_end; p++) { |
|
|
|
unsigned char ch = *(unsigned char *) p; |
|
|
|
size_t hash_new = (hash << 8) + ch; |
|
|
|
if (hash_new < hash) { /* overflow */ |
|
|
|
hash %= KR_MODULUS; /* take modulus */ |
|
|
|
hash = (hash << 8) + ch; /* and recompute */ |
|
|
|
} |
|
|
|
else { /* no overflow, so no need for modulus yet */ |
|
|
|
hash = hash_new; |
|
|
|
} |
|
|
|
} /* end of loop hasing chars */ |
|
|
|
|
|
|
|
/* Do final modulus if necessary */ |
|
|
|
if (hash >= KR_MODULUS) { |
|
|
|
hash %= KR_MODULUS; |
|
|
|
} |
|
|
|
|
|
|
|
return hash; |
|
|
|
} /* end of function kr_hash */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function locates the next substring match. It is intended to be called |
|
|
|
* as part of the scanning of a string for a substring |
|
|
|
* |
|
|
|
* Parameters |
|
|
|
* n_char_pattern: Length of pattern to find |
|
|
|
* p_pattern: Pattern to find. Need not be null-terminated |
|
|
|
* pp_string: Address containing the current location in the string. Updated |
|
|
|
* if a match is found. |
|
|
|
* p_last: Address of last possible location of a match |
|
|
|
* msb_factor: Constant related to hash update |
|
|
|
* h_pattern: Computed hash of pattern |
|
|
|
* p_h_string: Address containing the current hash value of the location |
|
|
|
* in the string being considered. It is updated in the function. |
|
|
|
* |
|
|
|
* Return value |
|
|
|
* NULL if no substring, or the address of the substring if one exists. |
|
|
|
*/ |
|
|
|
static inline const char *next_substr( |
|
|
|
size_t n_char_pattern, const char *p_pattern, |
|
|
|
const char **pp_string, const char * const p_last, |
|
|
|
const size_t msb_factor, const size_t h_pattern, size_t *p_h_string) |
|
|
|
{ |
|
|
|
const char *p_string = *pp_string; |
|
|
|
size_t h_string = *p_h_string; |
|
|
|
|
|
|
|
for ( ; ; ) { |
|
|
|
/* Update hash for next starting point at p_string + 1 */ |
|
|
|
if ((h_string = (((h_string - (unsigned char) p_string[0] * |
|
|
|
msb_factor) << 8) + p_string[n_char_pattern]) % |
|
|
|
KR_MODULUS) > KR_MODULUS) { /* negative value when signed */ |
|
|
|
h_string += KR_MODULUS; |
|
|
|
} |
|
|
|
++p_string; /* step to next starting point */ |
|
|
|
|
|
|
|
/* Compare at current starting point. If hashes match, |
|
|
|
* do full compare */ |
|
|
|
if (h_pattern == h_string && |
|
|
|
memcmp(p_pattern, p_string, n_char_pattern) == 0) { |
|
|
|
*pp_string = p_string; /* Update string location */ |
|
|
|
*p_h_string = h_string; /* and hash for another call */ |
|
|
|
return p_string; /* match here */ |
|
|
|
} |
|
|
|
|
|
|
|
/* Exit with no match if at last starting point */ |
|
|
|
if (p_string == p_last) { |
|
|
|
return (char *) NULL; /* no match found */ |
|
|
|
} |
|
|
|
} /* end of loop over starting points in string */ |
|
|
|
} /* end of function next_substr */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function returns TRUE if '\0' is among the n characters at p and |
|
|
|
* FALSE otherwise. */ |
|
|
|
static inline bool have_null(size_t n, const char *p) |
|
|
|
{ |
|
|
|
/* Scan backwards to make the common case of using a null termination |
|
|
|
* of a string for the null char be faster */ |
|
|
|
const char *p_cur = p + n - 1; |
|
|
|
for ( ; p_cur >= p; --p_cur) { /* Locate '\0' among the chars */ |
|
|
|
if (*p_cur == '\0') { /* found */ |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
} |
|
|
|
return FALSE; |
|
|
|
} /* end of function have_null */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function "finds a needle in a haystack" aka the first occurrence of |
|
|
|
* any character of needle in haystack. NULL is returned if none is found. |
|
|
|
* haystack must be terminated with '\0'. |
|
|
|
* |
|
|
|
* Remarks |
|
|
|
* p_needle does not need to be null terminated. In fact, a null can be |
|
|
|
* included among the characters to be located so that this funtion will |
|
|
|
* locate the end of haystack if none of the other characters is found and |
|
|
|
* would guarantee that the returned value is not NULL. |
|
|
|
* |
|
|
|
* The case of a '\0' included among the chars to locate is treated as a |
|
|
|
* special case for improved efficiency. |
|
|
|
* |
|
|
|
* For a sufficiently large haystack, further gains in performance can be |
|
|
|
* achieved by analyzing the characteristics of the needle values and |
|
|
|
* developing comparisons based on bit values or range values. As a |
|
|
|
* trivial example, for the needle string "01234567", instead of 8 |
|
|
|
* comparisons for the 8 values, 2 comparisons can be used by comparing |
|
|
|
* against >= 0 and against <= 7. Without a large enough haystack, the |
|
|
|
* computational time required for the analysis would not be recovered. |
|
|
|
*/ |
|
|
|
char *find_first_of(const char *haystack, |
|
|
|
unsigned int n_needle, const char *p_needle) |
|
|
|
{ |
|
|
|
/* Hanldle case of nothing to find */ |
|
|
|
if (n_needle == 0) { |
|
|
|
return (char *) NULL; |
|
|
|
} |
|
|
|
|
|
|
|
const char * const p_needle_end = p_needle + n_needle; |
|
|
|
if (have_null(n_needle, p_needle)) { /* searching for '\0' */ |
|
|
|
for ( ; ; ++haystack) { /* iterate over straws in haystack */ |
|
|
|
const char straw = *haystack; |
|
|
|
const char *p_needle_cur = p_needle; |
|
|
|
for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) { |
|
|
|
const char needle = *p_needle_cur; |
|
|
|
if (straw == needle) { /* found needle */ |
|
|
|
return (char *) haystack; |
|
|
|
} |
|
|
|
} /* end of loop over needles */ |
|
|
|
} /* end of loop over straws in haystack */ |
|
|
|
} /* end of case that '\0' among items being located */ |
|
|
|
|
|
|
|
/* Else '\0' is not among the items being located */ |
|
|
|
for ( ; ; ++haystack) { /* iterate over straws in haystack */ |
|
|
|
const char straw = *haystack; |
|
|
|
const char *p_needle_cur = p_needle; |
|
|
|
for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) { |
|
|
|
const char needle = *p_needle_cur; |
|
|
|
if (straw == needle) { /* found needle */ |
|
|
|
return (char *) haystack; |
|
|
|
} |
|
|
|
} /* end of loop over needles */ |
|
|
|
if (straw == '\0') { /* entire haystack searched */ |
|
|
|
return (char *) NULL; |
|
|
|
} |
|
|
|
} /* end of loop over straws in haystack */ |
|
|
|
} /* end of function find_first_of */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This function returns TRUE if the string has any of the characters |
|
|
|
* '"', '\'' or '\\' */ |
|
|
|
bool has_escape_or_quote(size_t n, const char *str) |
|
|
|
{ |
|
|
|
const char *str_end = str + n; |
|
|
|
for ( ; str != str_end; ++str) { |
|
|
|
const char ch_cur = *str; |
|
|
|
if (ch_cur == '"' || ch_cur == '\'' || ch_cur == '\\') { |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
} /* end of loop over chars in string */ |
|
|
|
|
|
|
|
return FALSE; |
|
|
|
} /* end of function may_have_eq */ |
|
|
|
|