Fwd: mpx_set_str2(mpx_t, const char*, const char*, int) suggestion

Pierre Chatelier pierre at chachatelier.fr
Tue Feb 2 19:00:46 UTC 2016


Hello,

It's an "up" from a previous message without answer.
I don't see guidelines on gmplib.org to know how to submit a patch, so I do not know how to submit otherwise.
Here is a patch for the following feature :

Currently the various mpx structures (mpz_t, mpq_t, mpf_t) have an input function similar to :
mpx_set_str(mpx_t r, const char *sp, int base)

For the sake of performance, I suggest a new prototype mpx_set_str2(mpx_t r, const char *start, const char* end, int base) (where [start;end[ is the string to consider)
For me, the interest is :
	-It saves calls to strlen() in the current implementation
	-users of gmplib (like me) do not need to create intermediate strings with '\0' terminator when parsing substring from complex input data

Regards,

Pierre Chatelier

# HG changeset patch
# User Pierre Chatelier <pierre at chachatelier.fr>
# Date 1449003901 -3600
#      Tue Dec 01 22:05:01 2015 +0100
# Node ID 89fd519f1bac6846bfa54319f416c6c75716ea71
# Parent  bb072477386f1a513937aec31cd6fa3e925f59f6
added mpz_set_str2
added mpq_set_str2
added mpf_set_str2

diff -r bb072477386f -r 89fd519f1bac gmp-h.in
--- a/gmp-h.in	Tue Nov 24 15:31:26 2015 +0100
+++ b/gmp-h.in	Tue Dec 01 22:05:01 2015 +0100
@@ -1032,6 +1032,9 @@
#define mpz_set_str __gmpz_set_str
__GMP_DECLSPEC int mpz_set_str (mpz_ptr, const char *, int);

+#define mpz_set_str2 __gmpz_set_str2
+__GMP_DECLSPEC int mpz_set_str2 (mpz_ptr, const char *, const char *, int);
+
#define mpz_set_ui __gmpz_set_ui
__GMP_DECLSPEC void mpz_set_ui (mpz_ptr, unsigned long int);

@@ -1233,6 +1236,9 @@
#define mpq_set_str __gmpq_set_str
__GMP_DECLSPEC int mpq_set_str (mpq_ptr, const char *, int);

+#define mpq_set_str2 __gmpq_set_str2
+__GMP_DECLSPEC int mpq_set_str2 (mpq_ptr, const char *, const char *, int);
+
#define mpq_set_ui __gmpq_set_ui
__GMP_DECLSPEC void mpq_set_ui (mpq_ptr, unsigned long int, unsigned long int);

@@ -1419,6 +1425,9 @@
#define mpf_set_str __gmpf_set_str
__GMP_DECLSPEC int mpf_set_str (mpf_ptr, const char *, int);

+#define mpf_set_str2 __gmpf_set_str2
+__GMP_DECLSPEC int mpf_set_str2 (mpf_ptr, const char *, const char *, int);
+
#define mpf_set_ui __gmpf_set_ui
__GMP_DECLSPEC void mpf_set_ui (mpf_ptr, unsigned long int);

diff -r bb072477386f -r 89fd519f1bac mpf/set_str.c
--- a/mpf/set_str.c	Tue Nov 24 15:31:26 2015 +0100
+++ b/mpf/set_str.c	Tue Dec 01 22:05:01 2015 +0100
@@ -122,6 +122,12 @@
int
mpf_set_str (mpf_ptr x, const char *str, int base)
{
+  return mpf_set_str2(x, str, str+strlen(str), base);
+}
+
+int
+mpf_set_str2 (mpf_ptr x, const char *start, const char *end, int base)
+{
  size_t str_size;
  char *s, *begs;
  size_t i, j;
@@ -133,16 +139,21 @@
  const char  *point = GMP_DECIMAL_POINT;
  size_t      pointlen = strlen (point);
  const unsigned char *digit_value;
+  const char* str = start;
  TMP_DECL;
+  
+  if (!start || (end <= start))
+    return -1;

-  c = (unsigned char) *str;
+  if (str<end)
+    c = (unsigned char) *str;

  /* Skip whitespace.  */
-  while (isspace (c))
+  while ((str<end) && isspace (c))
    c = (unsigned char) *++str;

  negative = 0;
-  if (c == '-')
+  if ((c == '-') && (str<end))
    {
      negative = 1;
      c = (unsigned char) *++str;
@@ -175,16 +186,16 @@
    {
      /* not a digit, must be a decimal point */
      for (i = 0; i < pointlen; i++)
-	if (str[i] != point[i])
+	if ((str+i >= end) || (str[i] != point[i]))
	  return -1;
-      if (digit_value[(unsigned char) str[pointlen]] >= base)
+      if ((str+pointlen >= end) || (digit_value[(unsigned char) str[pointlen]] >= base))
	return -1;
    }

  /* Locate exponent part of the input.  Look from the right of the string,
     since the exponent is usually a lot shorter than the mantissa.  */
  expptr = NULL;
-  str_size = strlen (str);
+  str_size = end-str;
  for (i = str_size - 1; i > 0; i--)
    {
      c = (unsigned char) str[i];
diff -r bb072477386f -r 89fd519f1bac mpq/set_str.c
--- a/mpq/set_str.c	Tue Nov 24 15:31:26 2015 +0100
+++ b/mpq/set_str.c	Tue Dec 01 22:05:01 2015 +0100
@@ -41,29 +41,32 @@
int
mpq_set_str (mpq_ptr q, const char *str, int base)
{
+  return mpq_set_str2(q, str, str+strlen(str), base);
+}
+
+int
+mpq_set_str2 (mpq_ptr q, const char *start, const char *end, int base)
+{
  const char  *slash;
-  char        *num;
-  size_t      numlen;
  int         ret;
+  const char* str = start;
+  
+  if (!start || (end<=start))
+    return -1;

-  slash = strchr (str, '/');
+  slash = memchr (str, '/', end-str);
  if (slash == NULL)
    {
      SIZ(DEN(q)) = 1;
      PTR(DEN(q))[0] = 1;

-      return mpz_set_str (mpq_numref(q), str, base);
+      return mpz_set_str2 (mpq_numref(q), str, end, base);
    }

-  numlen = slash - str;
-  num = __GMP_ALLOCATE_FUNC_TYPE (numlen+1, char);
-  memcpy (num, str, numlen);
-  num[numlen] = '\0';
-  ret = mpz_set_str (mpq_numref(q), num, base);
-  (*__gmp_free_func) (num, numlen+1);
+  ret = mpz_set_str2 (mpq_numref(q), str, slash, base);

  if (ret != 0)
    return ret;

-  return mpz_set_str (mpq_denref(q), slash+1, base);
+  return mpz_set_str2 (mpq_denref(q), slash+1, end, base);
}
diff -r bb072477386f -r 89fd519f1bac mpz/set_str.c
--- a/mpz/set_str.c	Tue Nov 24 15:31:26 2015 +0100
+++ b/mpz/set_str.c	Tue Dec 01 22:05:01 2015 +0100
@@ -44,6 +44,12 @@
int
mpz_set_str (mpz_ptr x, const char *str, int base)
{
+  return (mpz_set_str2(x, str, str+strlen(str), base));
+}
+
+int
+mpz_set_str2 (mpz_ptr x, const char *start, const char* end, int base)
+{
  size_t str_size;
  char *s, *begs;
  size_t i;
@@ -51,7 +57,11 @@
  int c;
  int negative;
  const unsigned char *digit_value;
+  const char* str = start;
  TMP_DECL;
+  
+  if (!start || (end<=start))
+    return -1;

  digit_value = digit_value_tab;
  if (base > 36)
@@ -64,12 +74,15 @@
    }

  /* Skip whitespace.  */
-  do
-    c = (unsigned char) *str++;
-  while (isspace (c));
+  if (str<end)
+  {
+    do
+      c = (unsigned char) *str++;
+    while ((isspace (c)) && (str<end));
+  }

  negative = 0;
-  if (c == '-')
+  if ((c == '-') && (str<end))
    {
      negative = 1;
      c = (unsigned char) *str++;
@@ -80,19 +93,19 @@

  /* If BASE is 0, try to find out the base by looking at the initial
     characters.  */
-  if (base == 0)
+  if ((base == 0) && (str<end))
    {
      base = 10;
      if (c == '0')
	{
	  base = 8;
	  c = (unsigned char) *str++;
-	  if (c == 'x' || c == 'X')
+	  if ((c == 'x' || c == 'X') && (str<end))
	    {
	      base = 16;
	      c = (unsigned char) *str++;
	    }
-	  else if (c == 'b' || c == 'B')
+	  else if ((c == 'b' || c == 'B') && (str<end))
	    {
	      base = 2;
	      c = (unsigned char) *str++;
@@ -101,7 +114,7 @@
    }

  /* Skip leading zeros and white space.  */
-  while (c == '0' || isspace (c))
+  while ((str<end) && (c == '0' || isspace (c)))
    c = (unsigned char) *str++;
  /* Make sure the string does not become empty, mpn_set_str would fail.  */
  if (c == 0)
@@ -111,7 +124,7 @@
    }

  TMP_MARK;
-  str_size = strlen (str - 1);
+  str_size = end-str+1;
  s = begs = (char *) TMP_ALLOC (str_size + 1);

  /* Remove spaces from the string and convert the result from ASCII to a


Regards,

Pierre Chatelier


Le 29 nov. 2015 à 07:26, Marco Bodrato <bodrato at mail.dm.unipi.it> a écrit :

> Ciao,
> 
> Il Ven, 27 Novembre 2015 8:53 pm, Pierre Chatelier ha scritto:
>> Currently the various mpx structures (mpz_t, mpq_t, mpf_t) have an
> 
>> For the sake of performance, I suggest a new prototype
>> mpx_set_str2(mpx_t r, const char *start, const char* end, int base)
>> (where [start;end[ is the string to consider)
> 
> The prototype of the underlying mpn function is
> mp_size_t mpn_set_str (mp_limb_t *rp, const unsigned char *str, size_t
> strsize, int base)
> 
> If you use start:end, you need to explain: *end is included or not?
> 
>> For me, the interest is :
> 
>> 	-It saves a call to strlen() in the current implementation
>> 	-users of gmplib do not need to create intermediate strings with
>> 	'\0' terminator when parsing substring from complex input data
> 
> From mpq/set_str.c:
> /* FIXME: Would like an mpz_set_mem (or similar) accepting a pointer and
> length so we wouldn't have to copy the numerator just to null-terminate
> it.  */
> 
> Regards,
> m
> 
> -- 
> http://bodrato.it/papers/
> 


_______________________________________________
gmp-discuss mailing list
gmp-discuss at gmplib.org
https://gmplib.org/mailman/listinfo/gmp-discuss



More information about the gmp-discuss mailing list