Use PHP functions in JavaScript

JavaScript substr

Returns part of a string

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
39
4041
42
43
44
4546
47
48
49
5051
52
53
54
5556
57
58
59
6061
62
63
64
6566
67
68
69
7071
72
73
74
7576
77
78
79
8081
82
83
84
8586
87
88
89
9091
92
93
94
9596
97
98
99
100101
102
103
104
105106
107
108
109
110111
112
113
114
115116
117
118
function substr (str, start, len) {
    // Returns part of a string  
    // 
    // version: 909.322
    // discuss at: http://phpjs.org/functions/substr    // +     original by: Martijn Wieringa
    // +     bugfixed by: T.Wild
    // +      tweaked by: Onno Marsman
    // +      revised by: Theriault
    // +      improved by: Brett Zamir (http://brett-zamir.me)    // %    note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
    // *       example 1: substr('abcdef', 0, -1);
    // *       returns 1: 'abcde'
    // *       example 2: substr(2, 0, -6);
    // *       returns 2: false    // *       example 3: ini_set('unicode.semantics',  'on');
    // *       example 3: substr('a\uD801\uDC00', 0, -1);
    // *       returns 3: 'a'
    // *       example 4: ini_set('unicode.semantics',  'on');
    // *       example 4: substr('a\uD801\uDC00', 0, 2);    // *       returns 4: 'a\uD801\uDC00'
    // *       example 5: ini_set('unicode.semantics',  'on');
    // *       example 5: substr('a\uD801\uDC00', -1, 1);
    // *       returns 5: '\uD801\uDC00'
    // *       example 6: ini_set('unicode.semantics',  'on');    // *       example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2);
    // *       returns 6: '\uD801\uDC00z'
    // *       example 7: ini_set('unicode.semantics',  'on');
    // *       example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
    // *       returns 7: '\uD801\uDC00z'// Add: (?) Use unicode.runtime_encoding (e.g., with string wrapped in "binary" or "Binary" class) to
// allow access of binary (see file_get_contents()) by: charCodeAt(x) & 0xFF (see https://developer.mozilla.org/En/Using_XMLHttpRequest ) or require conversion first?
 
    var i = 0, allBMP = true, es = 0, el = 0, se = 0, ret = '';
    str += '';    var end = str.length;
 
    // BEGIN REDUNDANT
    this.php_js = this.php_js || {};
    this.php_js.ini = this.php_js.ini || {};    // END REDUNDANT
    switch(
        (this.php_js.ini['unicode.semantics'] && 
            this.php_js.ini['unicode.semantics'].local_value.toLowerCase())) {
        case 'on': // Full-blown Unicode including non-Basic-Multilingual-Plane characters            // strlen()
            for (i=0; i < str.length; i++) {
                if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i+1))) {
                    allBMP = false;
                    break;                }
            }
 
            if (!allBMP) {
                if (start < 0) {                    for (i = end - 1, es = (start += end); i >= es; i--) {
                        if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i-1))) {
                            start--;
                            es--;
                        }                    }
                }
                else {
                    var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
                    while ((surrogatePairs.exec(str)) != null) {                        var li = surrogatePairs.lastIndex;
                        if (li - 2 < start) {
                            start++;
                        }
                        else {                            break;
                        }
                    }
                }
                 if (start >= end || start < 0) {
                    return false;
                }
                if (len < 0) {
                    for (i = end - 1, el = (end += len); i >= el; i--) {                        if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i-1))) {
                            end--;
                            el--;
                        }
                    }                    if (start > end) {
                        return false;
                    }
                    return str.slice(start, end);
                }                else {
                    se = start + len;
                    for (i = start; i < se; i++) {
                        ret += str.charAt(i);
                        if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i+1))) {                            se++; // Go one further, since one of the "characters" is part of a surrogate pair
                        }
                    }
                    return ret;
                }                break;
            }
            // Fall-through
        case 'off': // assumes there are no non-BMP characters;
                           //    if there may be such characters, then it is best to turn it on (critical in true XHTML/XML)        default:
            if (start < 0) {
                start += end;
            }
            end = typeof len === 'undefined' ? end : (len < 0 ? len + end : len + start);            // PHP returns false if start does not fall within the string.
            // PHP returns false if the calculated end comes before the calculated start.
            // PHP returns an empty string if start and end are the same.
            // Otherwise, PHP returns the portion of the string from start to end.
            return start >= str.length || start < 0 || start > end ? !1 : str.slice(start, end);    }
    return undefined; // Please Netbeans
}
external links: original PHP docs | raw js source

Examples

» Example 1

Running

1
substr('abcdef', 0, -1);

Should return

1
'abcde'

» Example 2

Running

1
substr(2, 0, -6);

Should return

1
false

Dependencies

No dependencies, you can use this function standalone.

Open syntax issues

php.js uses JsLint to help us keep our code consistent and prevent some common bugs.

Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.


Authors

Thanks to the following developers, you get to have substr goodness in JavaScript.

Comments

Add Comment
Use:
[CODE]
your_stuff('here');
[/CODE]
for proper code formatting
By submitting code here you are allowing us to use it in php.js hence dual licensing it under the MIT and GPL licenses

Gravatar
Brett Zamir
May 28th Permalink

q  @dazz: By the way, in addition to my comments just now, be aware that if you try to use some of these 4-byte characters on the web, they could actually cause problems for sites. For example, MySQL will not even store these unless the field is defined as binary. Some sites like Wikipedia are geared to support these characters (and our site because we prepared for it), but they are not yet universally supported in databases, server-side languages (PHP, like JS, can handle them, but does not treat them properly until PHP6), nor text editors, etc. (even some regular 2-byte characters are not displayable in some environments though probably usually due to font support issues, since even if the technology supports Unicode, the font must also support it in order to display it; if you agree that fonts would be ideally auto-downloaded in order to give automatic full coverage of any character without developers needing to use CSS' @font-face property ( https://developer.mozilla.org/index.php?title=En/CSS/%40font-face ), automatically providing fonts to the browser when the user visits a page requiring support of certain characters, add your vote to the bug report at https://bugzilla.mozilla.org/show_bug.cgi?id=512619 ).

Gravatar
Brett Zamir
May 28th Permalink

q  @dazz: Most of the code length (which is code which is only run if you type

ini_set('unicode.semantics',  'on');

before running substr) is for those who need to support Unicode 100%.

Unicode was invented to allow one to display any written character from any language on the same page at the same time, and JavaScript supports this. In the past, one could only use one or a few language scripts on the same page (e.g., ASCII for English, Latin-1 for European languages, etc.).

Each character in Unicode is represented abstractly by a number, so even if you can't see it (or if the current font doesn't support it), we can talk about a given character.

However, given all of the other written scripts being used in Unicode (i.e., all living scripts and even for many dead written languages), there were so many that once the more common characters of human languages were assigned to slots representable by a single character in JavaScript (2 bytes), there weren't any more slots left, so for some rarely used characters (like some ancient Chinese characters) it became necessary to represent them by 2 reserved characters (4 bytes) combined together (individually called "surrogates", the first in the pair being a "high surrogate" and the second a "low surrogate") which are not display characters by themselves but as far as memory and JavaScript functions are normally concerned (e.g., string length), they are like 2 full characters.

For example, try this in JavaScript (I recommend "Extension Developer's Extension" for Firefox, so you don't need to save a file but can just test JavaScript immediately from the toolbar; I like its XUL Editor for this as it evaluates the JavaScript immediately):

alert('槪'.length);



Although the above is only one character as far as appearance and cutting-and-pasting, the code will actually alert "2"!

And if you use

alert('槪'.charAt(0));

to find out what the first "character" inside that character is (or

alert('槪'.charAt(1));

to find out what the second one is), you'll see a weird number in a box be displayed (at least in Firefox) which means it is a character not supported by one's font, and in this case that is because the "character" is not even a character so your font is not going to display this like a character anyways.

So, substr (as it will be in international-friendly PHP6), when in Unicode mode (set by ini_set()) should not mistreat these in a count as though they were 2 characters (it should treat the surrogate pairs as though a single whole character), nor should it potentially cut these characters in half leading to a non-readable character being left over. Hope that helps explain it a little...

Not all of our functions are Unicode friendly in this way (and probably few if any other JavaScript libraries even take this into account in any of their functions) but we should support this for users who wish to use it (especially given that PHP6 is potentially supporting such characters).

@others: Sorry, I hope to get to your comments/patches soon... Been busy with work deadlines...

Gravatar
dazz
May 27th Permalink

q  wtF?? This Function is veryyy looooong

Gravatar
Val Che
Apr 5th Permalink

q  This one workd fine for me too :-) Thanks guys

Gravatar
Kevin van Zonneveld
5 Sep '08 Permalink

q  @ T.Wild: Sure does, thank you for your bugfix. I've also added a testcase for this situation so it cannot happen again.

Gravatar
T.Wild
3 Sep '08 Permalink

q  Had a bit of difficulty when this function was passed an integer instead of a string since [CODE=&quot;javascript&quot;].substring[/CODE] isn't an integer function.

I fixed the error thusly:
[CODE=&quot;javascript&quot;]
return (&quot;&quot;+f_string).substring(f_start, f_length);
[/CODE]
instead of:
[CODE=&quot;javascript&quot;]
return f_string.substring(f_start, f_length);
[/CODE]
forcing f_string to be a string, since this is how the PHP version treats the first parameter.
[CODE=&quot;php&quot;]
substr(1234,2,1) = 3;
[/CODE]

hope this helps.


Contribute a New function