Use PHP functions in JavaScript

JavaScript sscanf

Implements an ANSI C compatible sscanf

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
39
4041
42
43
44
4546
47
48
49
5051
52
53
54
5556
57
58
59
6061
62
63
64
6566
67
68
69
7071
72
73
74
7576
77
78
79
8081
82
83
84
8586
87
88
89
9091
92
93
94
9596
97
98
99
100101
102
103
104
105106
107
108
109
110111
112
113
114
115116
117
118
119
120121
122
123
124
125126
127
128
129
130131
132
133
134
135136
137
138
139
140141
142
143
144
145146
147
148
149
150151
152
153
154
155156
157
158
159
160161
162
163
164
165166
167
168
169
170171
172
173
174
175176
177
178
179
180181
182
183
184
185186
187
188
189
190191
192
193
194
195196
197
198
199
200201
202
203
204
205206
207
208
209
210211
212
213
214
215216
217
218
219
220221
222
223
224
225226
227
228
229
230231
232
233
234
235236
237
238
239
240241
function sscanf (str, format) {
    // +   original by: Brett Zamir (http://brett-zamir.me)
    // %        note 1: Since JS does not support scalar reference variables, any additional arguments to the function will
    // %        note 1: only be allowable here as strings referring to a global variable (which will then be set to the value
    // %        note 1: found in 'str' corresponding to the appropriate conversion specification in 'format'    // %        note 2: I am unclear on how WS is to be handled here because documentation seems to me to contradict PHP behavior
    // *     example 1: sscanf('SN/2350001', 'SN/%d');
    // *     returns 1: [2350001]
    // *     example 2: var myVar; // Will be set by function
    // *     example 2: sscanf('SN/2350001', 'SN/%d', 'myVar');    // *     returns 2: 1
    // *     example 3: sscanf("10--20", "%2$d--%1$d"); // Must escape '$' in PHP, but not JS
    // *     returns 3: [20, 10]
    // SETUP
    var retArr = [],        num = 0,
        _NWS = /\S/,
        args = arguments,
        that = this,
        digit; 
    var _setExtraConversionSpecs = function (offset) {
        // Since a mismatched character sets us off track from future legitimate finds, we just scan
        // to the end for any other conversion specifications (besides a percent literal), setting them to null
        // sscanf seems to disallow all conversion specification components (of sprintf) except for type specifiers        //var matches = format.match(/%[+-]?([ 0]|'.)?-?\d*(\.\d+)?[bcdeufFosxX]/g); // Do not allow % in last char. class
        var matches = format.slice(offset).match(/%[cdeEufgosxX]/g); // Do not allow % in last char. class;
        // b, F,G give errors in PHP, but 'g', though also disallowed, doesn't
        if (matches) {
            var lgth = matches.length;            while (lgth--) {
                retArr.push(null);
            }
        }
        return _finish();    };
 
    var _finish = function () {
        if (args.length === 2) {
            return retArr;        }
        for (var i = 0; i < retArr.length; ++i) {
            that.window[args[i + 2]] = retArr[i];
        }
        return i;    };
 
    var _addNext = function (j, regex, cb) {
        if (assign) {
            var remaining = str.slice(j);            var check = width ? remaining.substr(0, width) : remaining;
            var match = regex.exec(check);
            var testNull = retArr[digit !== undefined ? digit : retArr.length] = match ? (cb ? cb.apply(null, match) : match[0]) : null;
            if (testNull === null) {
                throw 'No match in string';            }
            return j + match[0].length;
        }
        return j;
    }; 
    if (arguments.length < 2) {
        throw 'Not enough arguments passed to sscanf';
    }
     // PROCESS
    for (var i = 0, j = 0; i < format.length; i++) {
 
        var width = 0,
            assign = true; 
        if (format.charAt(i) === '%') {
            if (format.charAt(i + 1) === '%') {
                if (str.charAt(j) === '%') { // a matched percent literal
                    ++i, ++j; // skip beyond duplicated percent                    continue;
                }
                // Format indicated a percent literal, but not actually present
                return _setExtraConversionSpecs(i + 2);
            } 
            // CHARACTER FOLLOWING PERCENT IS NOT A PERCENT
 
            var prePattern = new RegExp('^(?:(\\d+)\\$)?(\\*)?(\\d*)([hlL]?)', 'g'); // We need 'g' set to get lastIndex
             var preConvs = prePattern.exec(format.slice(i + 1));
 
            var tmpDigit = digit;
            if (tmpDigit && preConvs[1] === undefined) {
                throw 'All groups in sscanf() must be expressed as numeric if any have already been used';            }
            digit = preConvs[1] ? parseInt(preConvs[1], 10) - 1 : undefined;
 
            assign = !preConvs[2];
            width = parseInt(preConvs[3], 10);            var sizeCode = preConvs[4];
            i += prePattern.lastIndex;
 
            // Fix: Does PHP do anything with these? Seems not to matter
            if (sizeCode) { // This would need to be processed later                switch (sizeCode) {
                case 'h':
                    // Treats subsequent as short int (for d,i,n) or unsigned short int (for o,u,x)
                case 'l':
                    // Treats subsequent as long int (for d,i,n), or unsigned long int (for o,u,x);                    //    or as double (for e,f,g) instead of float or wchar_t instead of char
                case 'L':
                    // Treats subsequent as long double (for e,f,g)
                    break;
                default:                    throw 'Unexpected size specifier in sscanf()!';
                    break;
                }
            }
            // PROCESS CHARACTER            try {
                switch (format.charAt(i + 1)) {
                    // For detailed explanations, see http://web.archive.org/web/20031128125047/http://www.uwm.edu/cgi-bin/IMT/wwwman?topic=scanf%283%29&msection=
                    // Also http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sscanf.html
                    // p, S, C arguments in C function not available                    // DOCUMENTED UNDER SSCANF
                case 'F':
                    // Not supported in PHP sscanf; the argument is treated as a float, and
                    //  presented as a floating-point number (non-locale aware)
                    // sscanf doesn't support locales, so no need for two (see %f)                    break;
                case 'g':
                    // Not supported in PHP sscanf; shorter of %e and %f
                    // Irrelevant to input conversion
                    break;                case 'G':
                    // Not supported in PHP sscanf; shorter of %E and %f
                    // Irrelevant to input conversion
                    break;
                case 'b':                    // Not supported in PHP sscanf; the argument is treated as an integer, and presented as a binary number
                    // Not supported - couldn't distinguish from other integers
                    break;
                case 'i':
                    // Integer with base detection (Equivalent of 'd', but base 0 instead of 10)                    j = _addNext(j, /([+-])?(?:(?:0x([\da-fA-F]+))|(?:0([0-7]+))|(\d+))/, function (num, sign, hex, oct, dec) {
                        return hex ? parseInt(num, 16) : oct ? parseInt(num, 8) : parseInt(num, 10);
                    });
                    break;
                case 'n':                    // Number of characters processed so far
                    retArr[digit !== undefined ? digit : retArr.length - 1] = j;
                    break;
                    // DOCUMENTED UNDER SPRINTF
                case 'c':                    // Get character; suppresses skipping over whitespace! (but shouldn't be whitespace in format anyways, so no difference here)
                    // Non-greedy match
                    j = _addNext(j, new RegExp('.{1,' + (width || 1) + '}'));
                    break;
                case 'D':                    // sscanf documented decimal number; equivalent of 'd';
                case 'd':
                    // Optionally signed decimal integer
                    j = _addNext(j, /([+-])?(?:0*)(\d+)/, function (num, sign, dec) {
                        // Ignores initial zeroes, unlike %i and parseInt()                        var decInt = parseInt((sign || '') + dec, 10);
                        if (decInt < 0) { // PHP also won't allow less than -2147483648
                            return decInt < -2147483648 ? -2147483648 : decInt; // integer overflow with negative
                        } else { // PHP also won't allow greater than -2147483647
                            return decInt < 2147483647 ? decInt : 2147483647;                        }
                    });
                    break;
                case 'f':
                    // Although sscanf doesn't support locales, this is used instead of '%F'; seems to be same as %e                case 'E':
                    // These don't discriminate here as both allow exponential float of either case
                case 'e':
                    j = _addNext(j, /([+-])?(?:0*)(\d*\.?\d*(?:[eE]?\d+)?)/, function (num, sign, dec) {
                        if (dec === '.') {                            return null;
                        }
                        return parseFloat((sign || '') + dec); // Ignores initial zeroes, unlike %i and parseFloat()
                    });
                    break;                case 'u':
                    // unsigned decimal integer
                    // We won't deal with integer overflows due to signs
                    j = _addNext(j, /([+-])?(?:0*)(\d+)/, function (num, sign, dec) {
                        // Ignores initial zeroes, unlike %i and parseInt()                        var decInt = parseInt(dec, 10);
                        if (sign === '-') { // PHP also won't allow greater than 4294967295
                            return 4294967296 - decInt; // integer overflow with negative
                        } else {
                            return decInt < 4294967295 ? decInt : 4294967295;                        }
                    });
                    break;
                case 'o':
                    // Octal integer // Fix: add overflows as above?                    j = _addNext(j, /([+-])?(?:0([0-7]+))/, function (num, sign, oct) {
                        return parseInt(num, 8);
                    });
                    break;
                case 's':                    // Greedy match
                    j = _addNext(j, /\S+/);
                    break;
                case 'X':
                    // Same as 'x'?                case 'x':
                    // Fix: add overflows as above?
                    // Initial 0x not necessary here
                    j = _addNext(j, /([+-])?(?:(?:0x)?([\da-fA-F]+))/, function (num, sign, hex) {
                        return parseInt(num, 16);                    });
                    break;
                case '':
                    // If no character left in expression
                    throw 'Missing character after percent mark in sscanf() format argument';                default:
                    throw 'Unrecognized character after percent mark in sscanf() format argument';
                }
            } catch (e) {
                if (e === 'No match in string') { // Allow us to exit                    return _setExtraConversionSpecs(i + 2);
                }
            }++i; // Calculate skipping beyond initial percent too
        } else if (format.charAt(i) !== str.charAt(j)) {
            // Fix: Double-check i whitespace ignored in string and/or formats            _NWS.lastIndex = 0;
            if ((_NWS).test(str.charAt(j)) || str.charAt(j) === '') { // Whitespace doesn't need to be an exact match)
                return _setExtraConversionSpecs(i + 1);
            } else {
                // Adjust strings when encounter non-matching whitespace, so they align in future checks above                str = str.slice(0, j) + str.slice(j + 1); // Ok to replace with j++;?
                i--;
            }
        } else {
            j++;        }
    }
 
    // POST-PROCESSING
    return _finish();}
external links: original PHP docs | raw js source

Examples

» Example 1

Running

1
sscanf('SN/2350001', 'SN/%d');

Should return

1
[2350001]

» Example 2

Running

1
2
var myVar; // Will be set by function
sscanf('SN/2350001', 'SN/%d', 'myVar');

Should return

1
1

Dependencies

No dependencies, you can use this function standalone.

Open syntax issues

php.js uses JsLint to help us keep our code consistent and prevent some common bugs.

Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.


Authors

Thanks to the following developers, you get to have sscanf goodness in JavaScript.

Comments

Add Comment
Use:
[CODE]
your_stuff('here');
[/CODE]
for proper code formatting
By submitting code here you are allowing us to use it in php.js hence dual licensing it under the MIT and GPL licenses

No comments yet. Be the first!


Contribute a New function