JavaScript sscanf
Implements an ANSI C compatible sscanf
1 2 3 4 56 7 8 9 1011 12 13 14 1516 17 18 19 2021 22 23 24 2526 27 28 29 3031 32 33 34 3536 37 38 39 4041 42 43 44 4546 47 48 49 5051 52 53 54 5556 57 58 59 6061 62 63 64 6566 67 68 69 7071 72 73 74 7576 77 78 79 8081 82 83 84 8586 87 88 89 9091 92 93 94 9596 97 98 99 100101 102 103 104 105106 107 108 109 110111 112 113 114 115116 117 118 119 120121 122 123 124 125126 127 128 129 130131 132 133 134 135136 137 138 139 140141 142 143 144 145146 147 148 149 150151 152 153 154 155156 157 158 159 160161 162 163 164 165166 167 168 169 170171 172 173 174 175176 177 178 179 180181 182 183 184 185186 187 188 189 190191 192 193 194 195196 197 198 199 200201 202 203 204 205206 207 208 209 210211 212 213 214 215216 217 218 219 220221 222 223 224 225226 227 228 229 230231 232 233 234 235236 237 238 239 240241 | function sscanf (str, format) { // + original by: Brett Zamir (http://brett-zamir.me) // % note 1: Since JS does not support scalar reference variables, any additional arguments to the function will // % note 1: only be allowable here as strings referring to a global variable (which will then be set to the value // % note 1: found in 'str' corresponding to the appropriate conversion specification in 'format' // % note 2: I am unclear on how WS is to be handled here because documentation seems to me to contradict PHP behavior // * example 1: sscanf('SN/2350001', 'SN/%d'); // * returns 1: [2350001] // * example 2: var myVar; // Will be set by function // * example 2: sscanf('SN/2350001', 'SN/%d', 'myVar'); // * returns 2: 1 // * example 3: sscanf("10--20", "%2$d--%1$d"); // Must escape '$' in PHP, but not JS // * returns 3: [20, 10] // SETUP var retArr = [], num = 0, _NWS = /\S/, args = arguments, that = this, digit; var _setExtraConversionSpecs = function (offset) { // Since a mismatched character sets us off track from future legitimate finds, we just scan // to the end for any other conversion specifications (besides a percent literal), setting them to null // sscanf seems to disallow all conversion specification components (of sprintf) except for type specifiers //var matches = format.match(/%[+-]?([ 0]|'.)?-?\d*(\.\d+)?[bcdeufFosxX]/g); // Do not allow % in last char. class var matches = format.slice(offset).match(/%[cdeEufgosxX]/g); // Do not allow % in last char. class; // b, F,G give errors in PHP, but 'g', though also disallowed, doesn't if (matches) { var lgth = matches.length; while (lgth--) { retArr.push(null); } } return _finish(); }; var _finish = function () { if (args.length === 2) { return retArr; } for (var i = 0; i < retArr.length; ++i) { that.window[args[i + 2]] = retArr[i]; } return i; }; var _addNext = function (j, regex, cb) { if (assign) { var remaining = str.slice(j); var check = width ? remaining.substr(0, width) : remaining; var match = regex.exec(check); var testNull = retArr[digit !== undefined ? digit : retArr.length] = match ? (cb ? cb.apply(null, match) : match[0]) : null; if (testNull === null) { throw 'No match in string'; } return j + match[0].length; } return j; }; if (arguments.length < 2) { throw 'Not enough arguments passed to sscanf'; } // PROCESS for (var i = 0, j = 0; i < format.length; i++) { var width = 0, assign = true; if (format.charAt(i) === '%') { if (format.charAt(i + 1) === '%') { if (str.charAt(j) === '%') { // a matched percent literal ++i, ++j; // skip beyond duplicated percent continue; } // Format indicated a percent literal, but not actually present return _setExtraConversionSpecs(i + 2); } // CHARACTER FOLLOWING PERCENT IS NOT A PERCENT var prePattern = new RegExp('^(?:(\\d+)\\$)?(\\*)?(\\d*)([hlL]?)', 'g'); // We need 'g' set to get lastIndex var preConvs = prePattern.exec(format.slice(i + 1)); var tmpDigit = digit; if (tmpDigit && preConvs[1] === undefined) { throw 'All groups in sscanf() must be expressed as numeric if any have already been used'; } digit = preConvs[1] ? parseInt(preConvs[1], 10) - 1 : undefined; assign = !preConvs[2]; width = parseInt(preConvs[3], 10); var sizeCode = preConvs[4]; i += prePattern.lastIndex; // Fix: Does PHP do anything with these? Seems not to matter if (sizeCode) { // This would need to be processed later switch (sizeCode) { case 'h': // Treats subsequent as short int (for d,i,n) or unsigned short int (for o,u,x) case 'l': // Treats subsequent as long int (for d,i,n), or unsigned long int (for o,u,x); // or as double (for e,f,g) instead of float or wchar_t instead of char case 'L': // Treats subsequent as long double (for e,f,g) break; default: throw 'Unexpected size specifier in sscanf()!'; break; } } // PROCESS CHARACTER try { switch (format.charAt(i + 1)) { // For detailed explanations, see http://web.archive.org/web/20031128125047/http://www.uwm.edu/cgi-bin/IMT/wwwman?topic=scanf%283%29&msection= // Also http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sscanf.html // p, S, C arguments in C function not available // DOCUMENTED UNDER SSCANF case 'F': // Not supported in PHP sscanf; the argument is treated as a float, and // presented as a floating-point number (non-locale aware) // sscanf doesn't support locales, so no need for two (see %f) break; case 'g': // Not supported in PHP sscanf; shorter of %e and %f // Irrelevant to input conversion break; case 'G': // Not supported in PHP sscanf; shorter of %E and %f // Irrelevant to input conversion break; case 'b': // Not supported in PHP sscanf; the argument is treated as an integer, and presented as a binary number // Not supported - couldn't distinguish from other integers break; case 'i': // Integer with base detection (Equivalent of 'd', but base 0 instead of 10) j = _addNext(j, /([+-])?(?:(?:0x([\da-fA-F]+))|(?:0([0-7]+))|(\d+))/, function (num, sign, hex, oct, dec) { return hex ? parseInt(num, 16) : oct ? parseInt(num, 8) : parseInt(num, 10); }); break; case 'n': // Number of characters processed so far retArr[digit !== undefined ? digit : retArr.length - 1] = j; break; // DOCUMENTED UNDER SPRINTF case 'c': // Get character; suppresses skipping over whitespace! (but shouldn't be whitespace in format anyways, so no difference here) // Non-greedy match j = _addNext(j, new RegExp('.{1,' + (width || 1) + '}')); break; case 'D': // sscanf documented decimal number; equivalent of 'd'; case 'd': // Optionally signed decimal integer j = _addNext(j, /([+-])?(?:0*)(\d+)/, function (num, sign, dec) { // Ignores initial zeroes, unlike %i and parseInt() var decInt = parseInt((sign || '') + dec, 10); if (decInt < 0) { // PHP also won't allow less than -2147483648 return decInt < -2147483648 ? -2147483648 : decInt; // integer overflow with negative } else { // PHP also won't allow greater than -2147483647 return decInt < 2147483647 ? decInt : 2147483647; } }); break; case 'f': // Although sscanf doesn't support locales, this is used instead of '%F'; seems to be same as %e case 'E': // These don't discriminate here as both allow exponential float of either case case 'e': j = _addNext(j, /([+-])?(?:0*)(\d*\.?\d*(?:[eE]?\d+)?)/, function (num, sign, dec) { if (dec === '.') { return null; } return parseFloat((sign || '') + dec); // Ignores initial zeroes, unlike %i and parseFloat() }); break; case 'u': // unsigned decimal integer // We won't deal with integer overflows due to signs j = _addNext(j, /([+-])?(?:0*)(\d+)/, function (num, sign, dec) { // Ignores initial zeroes, unlike %i and parseInt() var decInt = parseInt(dec, 10); if (sign === '-') { // PHP also won't allow greater than 4294967295 return 4294967296 - decInt; // integer overflow with negative } else { return decInt < 4294967295 ? decInt : 4294967295; } }); break; case 'o': // Octal integer // Fix: add overflows as above? j = _addNext(j, /([+-])?(?:0([0-7]+))/, function (num, sign, oct) { return parseInt(num, 8); }); break; case 's': // Greedy match j = _addNext(j, /\S+/); break; case 'X': // Same as 'x'? case 'x': // Fix: add overflows as above? // Initial 0x not necessary here j = _addNext(j, /([+-])?(?:(?:0x)?([\da-fA-F]+))/, function (num, sign, hex) { return parseInt(num, 16); }); break; case '': // If no character left in expression throw 'Missing character after percent mark in sscanf() format argument'; default: throw 'Unrecognized character after percent mark in sscanf() format argument'; } } catch (e) { if (e === 'No match in string') { // Allow us to exit return _setExtraConversionSpecs(i + 2); } }++i; // Calculate skipping beyond initial percent too } else if (format.charAt(i) !== str.charAt(j)) { // Fix: Double-check i whitespace ignored in string and/or formats _NWS.lastIndex = 0; if ((_NWS).test(str.charAt(j)) || str.charAt(j) === '') { // Whitespace doesn't need to be an exact match) return _setExtraConversionSpecs(i + 1); } else { // Adjust strings when encounter non-matching whitespace, so they align in future checks above str = str.slice(0, j) + str.slice(j + 1); // Ok to replace with j++;? i--; } } else { j++; } } // POST-PROCESSING return _finish();} |
Examples
» Example 1
Running
1 | sscanf('SN/2350001', 'SN/%d'); |
Should return
1 | [2350001] |
» Example 2
Running
1 2 | var myVar; // Will be set by function sscanf('SN/2350001', 'SN/%d', 'myVar'); |
Should return
1 | 1 |
Dependencies
No dependencies, you can use this function standalone.
Open syntax issues
php.js uses JsLint to help us keep our code consistent and prevent some common bugs.
Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.
Authors
Thanks to the following developers, you get to have sscanf goodness in JavaScript.
No comments yet. Be the first!
spread the word:
Use any PHP function in JavaScript
These kind folks have already donated: AYHAN BARI*, Nikita Ekshiyan, Nikita Ekshiyan, Petr Pavel, @HalfWinter, Paulo Freitas, Andros Peña Romo, @andorosu, Raimund Szabo, Nitin Gupta, @nikosdion, Anonymous, Anonymous and Shawn Houser.
<your name here>