Thanks for including a test program - that's always valuable!
However, I'm going to change it, to parse a battery of test cases instead of reading from stdin:
#include <stdio.h>
int main(void)
{
static const char *const strings[] = {
/* these should parse fully */
"12",
"12.0",
"08", /* not octal! */
"+12.34",
".34",
"\t \n2.",
"1e0",
"1e+0",
"1e-0",
"1.e4",
".1e-4",
"-5e006",
"-5e+16",
"-.05",
"-.0",
"-1e6",
/* these should parse only the initial part */
"5c5",
"10ee5",
"0x06", /* not hex! */
"--1" ,
"-+1" ,
"1e--4" ,
"-1e.4",
"1e 4",
"1e-g",
"", "foobar", /* both 0 */
" e5", /* also 0 */
"-1e6",
/* overflow/underflow */
"1e500000",
"1e-500000",
"-1e500000",
"-1e-500000",
};
static const int max = sizeof strings / sizeof strings[0];
for (int i = 0; i < max; ++i)
printf("%20s = > %.9g\n", strings[i], extended_atof(strings[i]));
}
(I changed the function name to extended_atof() so as to be safely distinct from the standard library atof().)
Your implementation passes all these tests. Now we can look at refactoring.
Remove duplication
The things that we parse in multiple places are:
- optional sign
+ or -
- digit sequences
So perhaps we can refactor each of those into a function? Instead of using an integer index into the supplied string, I prefer to just move the string pointer, and eliminate the need for i:
/* return true for positive, false for negative,
and advance `*s` to next position */
static bool parse_sign(const char **s)
{
switch (**s) {
case '-': ++*s; return false;
case '+': ++*s; return true;
default: return true;
}
}
Let's make use of that in the function:
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/
double value = 0.0;
while (isdigit(*s))
value = value * 10.0 + (*s++ - '0');
if (*s == '.') {
++s;
}
double power = 1.0;
while (isdigit(*s)) {
value = value * 10.0 + (*s++ - '0');
power *= 10.0;
}
if (tolower(*s) == 'e') {
++s;
} else {
return sign * value/power;
}
bool powersign = parse_sign(&s); /*The sign following the E*/
int power2 = 0.0; /*The number following the E*/
while (isdigit(*s))
power2 = power2 * 10.0 + (*s++ - '0');
if (powersign) {
while (power2 != 0) {
power /= 10;
--power2;
}
} else {
while (power2 != 0) {
power *= 10;
--power2;
}
}
return sign * value/power;
}
It's slightly shorter, and it still passes all the tests.
Let's see if we can read digit strings in a function, and replace the three places we do that. We'll make it update a count of how many digits wore parsed, so we don't lose leading zeros in the fractional part:
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/
double value = parse_digits(&s, NULL);
if (*s == '.') {
++s;
int d; /* digits in fraction */
double fraction = parse_digits(&s, &d);
while (d--)
fraction /= 10.0;
value += fraction;
}
value *= sign;
if (tolower(*s) == 'e') {
++s;
} else {
return value;
}
bool powersign = parse_sign(&s); /*The sign following the E*/
int power2 = parse_digits(&s, NULL); /*The number following the E*/
double power = 1.0;
if (powersign) {
while (power2 != 0) {
power /= 10;
--power2;
}
} else {
while (power2 != 0) {
power *= 10;
--power2;
}
}
return value/power;
}
Tests still pass; what's next?
if (tolower(*s) == 'e') {
++s;
} else {
return value;
}
This can be reversed, and if we're returning, it doesn't matter what we do to s:
if (tolower(*s++) != 'e')
return value;
Here's some near-duplicate blocks:
double power = 1.0;
if (powersign) {
while (power2 != 0) {
power /= 10;
--power2;
}
} else {
while (power2 != 0) {
power *= 10;
--power2;
}
}
Dividing by 10 is the same as multiplying by 0.1, so we can move the test into the loop:
double power = 1.0;
while (power2 != 0) {
power *= powersign ? 0.1 : 10;
--power2;
}
We could go further, and capture powersign ? 0.1 : 10 into a variable. We can also eliminate the power variable from here, and multiply value directly:
const double exponentsign = parse_sign(&s) ? 10. : .1;
int exponent = parse_digits(&s, NULL);
while (exponent--)
value *= exponentsign;
Final version
Here's what I finished up with:
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
/* return true for positive, false for negative,
and advance `*s` to next position */
static bool parse_sign(const char **const s)
{
switch (**s) {
case '-': ++*s; return false;
case '+': ++*s; return true;
default: return true;
}
}
/* return decimal value of digits,
advancing `*s` to the next character,
and storing the number of digits read into *count */
static double parse_digits(const char **const s, int *const count)
{
double value = 0.0;
int c = 0;
while (isdigit(**s)) {
value = value * 10.0 + (*(*s)++ - '0');
++c;
}
if (count)
*count = c;
return value;
}
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
const bool valuesign = parse_sign(&s); /* sign of the number */
double value = parse_digits(&s, NULL);
if (*s == '.') {
int d; /* number of digits in fraction */
++s;
double fraction = parse_digits(&s, &d);
while (d--)
fraction /= 10.0;
value += fraction;
}
if (!valuesign)
value = -value;
if (tolower(*s++) != 'e')
return value;
/* else, we have an exponent; parse its sign and value */
const double exponentsign = parse_sign(&s) ? 10. : .1;
int exponent = parse_digits(&s, NULL);
while (exponent--)
value *= exponentsign;
return value;
}
/* Test program */
#include <stdio.h>
int main(void)
{
static const char *const strings[] = {
/* these should parse fully */
"12",
"12.0",
"08", /* not octal! */
"+12.34",
".34",
"\t \n2.",
"1e0",
"1e+0",
"1e-0",
"1.e4",
".1e-4",
"-5e006",
"-5e+16",
"-.05",
"-.0",
"-1e6",
/* these should parse only the initial part */
"5c5",
"10ee5",
"0x06", /* not hex! */
"--1" ,
"-+1" ,
"1e--4" ,
"-1e.4",
"1e 4",
"1e-g",
"", "foobar", /* both 0 */
" e5", /* also 0 */
"-1e6",
/* overflow/underflow */
"1e500000",
"1e-500000",
"-1e500000",
"-1e-500000",
};
static const int max = sizeof strings / sizeof strings[0];
for (int i = 0; i < max; ++i)
printf("%20s = > %.9g\n", strings[i], extended_atof(strings[i]));
}
There's still an opportunity for a small improvement: an extremely long fractional part could overflow double (this problem existed in your original). Instead of returning a large value from parse_int(), you could consider always returning a fractional value in the range [0...1), and use the number of digits to scale up the integer parts. Then we'd just end up with lost precision at the lower end. That would look like:
static double parse_digits(const char **const s, int *const count)
{
double value = 0.0;
double increment = 0.1;
int c = 0;
while (isdigit(**s)) {
value += increment * (*(*s)++ - '0');
increment /= 10;
++c;
}
if (count)
*count = c;
return value;
}
The corresponding uses would be:
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
int d; /* number of digits */
const bool valuesign = parse_sign(&s); /* sign of the number */
double value = parse_digits(&s, &d);
while (d--)
value *= 10;
if (*s == '.') {
++s;
double fraction = parse_digits(&s, NULL);
value += fraction;
}
if (!valuesign)
value = -value;
if (tolower(*s++) != 'e')
return value;
/* else, we have an exponent; parse its sign and value */
const double exponentsign = parse_sign(&s) ? 10. : .1;
double exponent_f = parse_digits(&s, &d);
while (d--)
exponent_f *= 10;
unsigned long exponent = exponent_f;
while (exponent-->0)
value *= exponentsign;
return value;
}
stdlib.h? So do you need to implement that yourself at all? \$\endgroup\$strtodis implemented. (or perhaps not, if it is full of platform-dependent trickery!) \$\endgroup\$