JavaScript file_get_contents
Read the entire file into a string
1 2 3 4 56 7 8 9 1011 12 13 14 1516 17 18 19 2021 22 23 24 2526 27 28 29 3031 32 33 34 3536 37 38 39 4041 42 43 44 4546 47 48 49 5051 52 53 54 5556 57 58 59 6061 62 63 64 6566 67 68 69 7071 72 73 74 7576 77 78 79 8081 82 83 84 8586 87 88 89 9091 92 93 94 9596 97 98 99 100101 102 103 104 105106 107 108 109 110111 112 113 114 115116 117 118 119 120121 122 123 124 125126 127 128 129 130131 132 133 134 135136 137 138 139 140141 142 143 144 145146 147 148 149 150151 152 153 154 155156 157 158 159 160161 162 163 164 165166 167 168 169 170171 172 173 174 175176 177 178 179 180181 182 183 184 185186 187 188 189 190191 192 193 194 195196 197 198 199 200201 202 203 204 205206 207 208 209 210211 212 213 214 215216 217 218 219 220221 222 223 224 225226 227 228 229 230231 232 233 234 235236 237 238 239 240241 242 243 244 245246 247 248 249 250251 | function file_get_contents (url, flags, context, offset, maxLen) { // Read the entire file into a string // // version: 906.111 // discuss at: http://phpjs.org/functions/file_get_contents // + original by: Legaev Andrey // + input by: Jani Hartikainen // + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) // + improved by: Brett Zamir (http://brett-zamir.me) // + input by: Raphael (Ao) RUDLER // + bugfixed by: Brett Zamir (http://brett-zamir.me) // % note 1: This function uses XmlHttpRequest and cannot retrieve resource from different domain without modifications. // % note 2: Synchronous by default (as in PHP) so may lock up browser. Can // % note 2: get async by setting a custom "phpjs.async" property to true and "notification" for an // % note 2: optional callback (both as context params, with responseText, and other JS-specific // % note 2: request properties available via 'this'). Note that file_get_contents() will not return the text // % note 2: in such a case (use this.responseText within the callback). Or, consider using // % note 2: jQuery's: $('#divId').load('http://url') instead. // % note 3: The context argument is only implemented for http, and only partially (see below for // % note 3: "Presently unimplemented HTTP context options"); also the arguments passed to // % note 3: notification are incomplete // * example 1: file_get_contents('http://kevin.vanzonneveld.net/pj_test_supportfile_1.htm'); // * returns 1: '123' // Note: could also be made to optionally add to global $http_response_header as per http://php.net/manual/en/reserved.variables.httpresponseheader.php var tmp, headers = [], newTmp = [], k=0, i=0, href = '', pathPos = -1, flagNames = 0, content = null, http_stream = false; var func = function (value) { return value.substring(1) !== ''; }; // BEGIN REDUNDANT this.php_js = this.php_js || {}; this.php_js.ini = this.php_js.ini || {}; // END REDUNDANT var ini = this.php_js.ini; context = context || this.php_js.default_streams_context || null; if (!flags) {flags = 0;} var OPTS = { FILE_USE_INCLUDE_PATH : 1, FILE_TEXT : 32, FILE_BINARY : 64 }; if (typeof flags === 'number') { // Allow for a single string or an array of string flags flagNames = flags; } else { flags = [].concat(flags); for (i=0; i < flags.length; i++) { if (OPTS[flags[i]]) { flagNames = flagNames | OPTS[flags[i]]; } } } if (flagNames & OPTS.FILE_BINARY && (flagNames & OPTS.FILE_TEXT)) { // These flags shouldn't be together throw 'You cannot pass both FILE_BINARY and FILE_TEXT to file_get_contents()'; } if ((flagNames & OPTS.FILE_USE_INCLUDE_PATH) && ini.include_path && ini.include_path.local_value) { var slash = ini.include_path.local_value.indexOf('/') !== -1 ? '/' : '\\'; url = ini.include_path.local_value+slash+url; } else if (!/^(https?|file):/.test(url)) { // Allow references within or below the same directory (should fix to allow other relative references or root reference; could make dependent on parse_url()) href = this.window.location.href; pathPos = url.indexOf('/') === 0 ? href.indexOf('/', 8)-1 : href.lastIndexOf('/'); url = href.slice(0, pathPos+1)+url; } if (context) { var http_options = context.stream_options && context.stream_options.http; http_stream = !!http_options; } if (!context || http_stream) { var req = this.window.ActiveXObject ? new ActiveXObject('Microsoft.XMLHTTP') : new XMLHttpRequest(); if (!req) {throw new Error('XMLHttpRequest not supported');} var method = http_stream ? http_options.method : 'GET'; var async = !!(context && context.stream_params && context.stream_params['phpjs.async']); if (ini['phpjs.ajaxBypassCache'] && ini['phpjs.ajaxBypassCache'].local_value) { url += (url.match(/\?/) == null ? "?" : "&") + (new Date()).getTime(); // Give optional means of forcing bypass of cache } req.open(method, url, async); if (async) { var notification = context.stream_params.notification; if (typeof notification === 'function') { // Fix: make work with req.addEventListener if available: https://developer.mozilla.org/En/Using_XMLHttpRequest if (0 && req.addEventListener) { // Unimplemented so don't allow to get here /* req.addEventListener('progress', updateProgress, false); req.addEventListener('load', transferComplete, false); req.addEventListener('error', transferFailed, false); req.addEventListener('abort', transferCanceled, false); */ } else { req.onreadystatechange = function (aEvt) { // aEvt has stopPropagation(), preventDefault(); see https://developer.mozilla.org/en/NsIDOMEvent // Other XMLHttpRequest properties: multipart, responseXML, status, statusText, upload, withCredentials /* PHP Constants: STREAM_NOTIFY_RESOLVE 1 A remote address required for this stream has been resolved, or the resolution failed. See severity for an indication of which happened. STREAM_NOTIFY_CONNECT 2 A connection with an external resource has been established. STREAM_NOTIFY_AUTH_REQUIRED 3 Additional authorization is required to access the specified resource. Typical issued with severity level of STREAM_NOTIFY_SEVERITY_ERR. STREAM_NOTIFY_MIME_TYPE_IS 4 The mime-type of resource has been identified, refer to message for a description of the discovered type. STREAM_NOTIFY_FILE_SIZE_IS 5 The size of the resource has been discovered. STREAM_NOTIFY_REDIRECTED 6 The external resource has redirected the stream to an alternate location. Refer to message . STREAM_NOTIFY_PROGRESS 7 Indicates current progress of the stream transfer in bytes_transferred and possibly bytes_max as well. STREAM_NOTIFY_COMPLETED 8 There is no more data available on the stream. STREAM_NOTIFY_FAILURE 9 A generic error occurred on the stream, consult message and message_code for details. STREAM_NOTIFY_AUTH_RESULT 10 Authorization has been completed (with or without success). STREAM_NOTIFY_SEVERITY_INFO 0 Normal, non-error related, notification. STREAM_NOTIFY_SEVERITY_WARN 1 Non critical error condition. Processing may continue. STREAM_NOTIFY_SEVERITY_ERR 2 A critical error occurred. Processing cannot continue. */ var objContext = { responseText : req.responseText, responseXML : req.responseXML, status : req.status, statusText : req.statusText, readyState : req.readyState, evt : aEvt }; // properties are not available in PHP, but offered on notification via 'this' for convenience // notification args: notification_code, severity, message, message_code, bytes_transferred, bytes_max (all int's except string 'message') // Need to add message, etc. var bytes_transferred; switch (req.readyState) { case 0: // UNINITIALIZED open() has not been called yet. notification.call(objContext, 0, 0, '', 0, 0, 0); break; case 1: // LOADING send() has not been called yet. notification.call(objContext, 0, 0, '', 0, 0, 0); break; case 2: // LOADED send() has been called, and headers and status are available. notification.call(objContext, 0, 0, '', 0, 0, 0); break; case 3: // INTERACTIVE Downloading; responseText holds partial data. bytes_transferred = req.responseText.length*2; // One character is two bytes notification.call(objContext, 7, 0, '', 0, bytes_transferred, 0); break; case 4: // COMPLETED The operation is complete. if (req.status >= 200 && req.status < 400) { bytes_transferred = req.responseText.length*2; // One character is two bytes notification.call(objContext, 8, 0, '', req.status, bytes_transferred, 0); } else if (req.status === 403) { // Fix: These two are finished except for message notification.call(objContext, 10, 2, '', req.status, 0, 0); } else { // Errors notification.call(objContext, 9, 2, '', req.status, 0, 0); } break; default: throw 'Unrecognized ready state for file_get_contents()'; } } } } } if (http_stream) { var sendHeaders = http_options.header && http_options.header.split(/\r?\n/); var userAgentSent = false; for (i=0; i < sendHeaders.length; i++) { var sendHeader = sendHeaders[i]; var breakPos = sendHeader.search(/:\s*/); var sendHeaderName = sendHeader.substring(0, breakPos); req.setRequestHeader(sendHeaderName, sendHeader.substring(breakPos+1)); if (sendHeaderName === 'User-Agent') { userAgentSent = true; } } if (!userAgentSent) { var user_agent = http_options.user_agent || (ini.user_agent && ini.user_agent.local_value); if (user_agent) { req.setRequestHeader('User-Agent', user_agent); } } content = http_options.content || null; /* // Presently unimplemented HTTP context options var request_fulluri = http_options.request_fulluri || false; // When set to TRUE, the entire URI will be used when constructing the request. (i.e. GET http://www.example.com/path/to/file.html HTTP/1.0). While this is a non-standard request format, some proxy servers require it. var max_redirects = http_options.max_redirects || 20; // The max number of redirects to follow. Value 1 or less means that no redirects are followed. var protocol_version = http_options.protocol_version || 1.0; // HTTP protocol version var timeout = http_options.timeout || (ini.default_socket_timeout && ini.default_socket_timeout.local_value); // Read timeout in seconds, specified by a float var ignore_errors = http_options.ignore_errors || false; // Fetch the content even on failure status codes. */ } if (flagNames & OPTS.FILE_TEXT) { // Overrides how encoding is treated (regardless of what is returned from the server) var content_type = 'text/html'; if (http_options && http_options['phpjs.override']) { // Fix: Could allow for non-HTTP as well content_type = http_options['phpjs.override']; // We use this, e.g., in gettext-related functions if character set // overridden earlier by bind_textdomain_codeset() } else { var encoding = (ini['unicode.stream_encoding'] && ini['unicode.stream_encoding'].local_value) || 'UTF-8'; if (http_options && http_options.header && (/^content-type:/im).test(http_options.header)) { // We'll assume a content-type expects its own specified encoding if present content_type = http_options.header.match(/^content-type:\s*(.*)$/im)[1]; // We let any header encoding stand } if (!(/;\s*charset=/).test(content_type)) { // If no encoding content_type += '; charset='+encoding; } } req.overrideMimeType(content_type); } // Default is FILE_BINARY, but for binary, we apparently deviate from PHP in requiring the flag, since many if not // most people will also want a way to have it be auto-converted into native JavaScript text instead else if (flagNames & OPTS.FILE_BINARY) { // Trick at https://developer.mozilla.org/En/Using_XMLHttpRequest to get binary req.overrideMimeType('text/plain; charset=x-user-defined'); // Getting an individual byte then requires: // responseText.charCodeAt(x) & 0xFF; // throw away high-order byte (f7) where x is 0 to responseText.length-1 (see notes in our substr()) } if (http_options && http_options['phpjs.sendAsBinary']) { // For content sent in a POST or PUT request (use with file_put_contents()?) req.sendAsBinary(content); // In Firefox, only available FF3+ } else { req.send(content); } tmp = req.getAllResponseHeaders(); if (tmp) { tmp = tmp.split('\n'); for (k=0; k < tmp.length; k++) { if (func(tmp[k])) { newTmp.push(tmp[k]); } } tmp = newTmp; for (i=0; i < tmp.length; i++) { headers[i] = tmp[i]; } this.$http_response_header = headers; // see http://php.net/manual/en/reserved.variables.httpresponseheader.php } if (offset || maxLen) { if (maxLen) { return req.responseText.substr(offset || 0, maxLen); } return req.responseText.substr(offset); } return req.responseText; } return false;} |
Examples
Running
1 | file_get_contents('http://kevin.vanzonneveld.net/pj_test_supportfile_1.htm'); |
Should return
1 | '123' |
Dependencies
No dependencies, you can use this function standalone.
Open syntax issues
php.js uses JsLint to help us keep our code consistent and prevent some common bugs.
Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.
Authors
Thanks to the following developers, you get to have file_get_contents goodness in JavaScript.
The code above is not the same as the raw source, so users need to insure they pull down the raw source.
Line 177 appears to be an "if" without a "true" clause because "req.sendAsBinary(content);" is commented out. Is this the way it is supposed to be?
Here is the error:
Error: uncaught exception: [Exception... "Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIXMLHttpRequest.send]" nsresult: "0x80004005 (NS_ERROR_FAILURE)" location: "JS frame :: file:///C:/jscript/test_last.html :: file_get_contents :: line 227" data: no]
and line 227 is accidentally the same as line 227 on this page
Firefox is the browser I usually use for testing, unless someone reports a bug elsewhere. What problem(s) were you having? Some sample code maybe? When in Firefox, what errors are you getting in Tools->Error Console ?
Hi, is this function tested in other than IE-based browsers? I'm not able to make it work in e.g. firefox, opera, google chrome. In IE it works fine.
thanks
@Jeffrey: I see in the XMLHttpRequest draft at the W3C that overrideMimeType() is not yet standardized on. It should work in at least Firefox, so you can confirm for yourself that it works there, but I'm not familiar with support elsewhere, so it looks like that won't be a robust solution for now.
Also, the scheme the spec mentions for handling character sets indeed does not inspect on the tags to detect character set (though HTML 5 still will! http://www.whatwg.org/specs/web-apps/current-work/#attr-meta-http-equiv-content-type ), so unless someone complains to the spec team and they accept a change, XMLHttpRequest() doesn't look like it will ever automatically handle meta tag encodings with responseText by the looks of things. (DOMParser() is being discussed as a standardization candidate, which could perhaps, if given a "text/html" argument, check for meta encodings, but that is not implemented or agreed upon.)
If you are talking about a regular website (and not privileged JS like in a browser extension), you need to be in control of the targeted website anyways (we don't have cross-domain support implemented in file_get_contents() yet, and it requires explicit server permissions anyways).
So, it looks like your options are:
1) get the site to either: A) upgrade to utf8 (best solution for everyone and most future proof solution) or, B) to at least send a header which indicates the current character set they are using. This can be easily done in the likes of PHP by adding a line at the top of your PHP script like this (should be before even any whitespace is sent by the browser, so make sure your beginning block is not preceded by whitespace or HTML, and if it is, add a PHP block with the following line before it):
"header('Content-Type: text/html;charset=gb2312)" (replacing "gb2312" with the character set being sent out by the website in its meta tag)
2) find a JS conversion algorithm of the site's character set into Unicode (be sure to share it here if you do!--unicode_encode() could use this: http://php.net/manual/en/function.unicode-encode.php --although we already have utf8_encode() implemented, that only works for Latin-1)). But as with overrideMimeType(), you'll need to know, guess, or extract the character set to make the conversion.
@Brett: Thanks for looking into this. I just tried inserting that line (req.overrideMimeType...) in the function and received the error "Object doesn't support this property or method."
@Jeffrey Brown: I believe the issue is probably due to Ajax having trouble detecting the correct MIME type. For example, in Firefox, when I use file_get_contents() on http://www.sina.com , a Chinese website in the gb2312 encoding, I also get question marks (though if you load it in the browser, it is ok). I don't have the same problem when loading a file which has the proper character encoding set in the server's response header: e.g., http://bahai-academic.hk/?langu=zh-CN
While I would have thought that setting a header like "Accept-Charset" would work (you can set headers through the stream context argument in file_get_contents() if need be), it didn't. It seems, at least in cases where the website doesn't set a content type header (when the browser can only get the encoding by parsing the text for meta tags, etc.) one must instead actually manually override the mime type (how the content is treated after it is returned from the server):
1 | req.overrideMimeType('text/html; charset=gb2312'); |
I'm not sure how to best implement this in file_get_contents(). I think we might solve this by adding our own custom option to the stream context which allowed overriding the mime type instead of just sending an (optional) request header. Maybe we can name the option "phpjs.overrideMimeType"?
@Caleb and Ben: Sorry we missed your question, Caleb, and thanks for answering Ben.
What Ben said is true, but as of Firefox 3.5 (and possibly other newer browsers, though no doubt not Explorer yet), it is possible for servers to indicate they are willing to accept certain Ajax from other sites, but they have to do so explicitly: https://developer.mozilla.org/En/HTTP_Access_Control
Another client-side alternative (though more challenging for a beginner) is to use the JavaScript inside of an extension or widget which has privileges, or use "signed" JavaScript in HTML for which permission is required from the user (though this may only work in certain browsers). In such a case, access can take place cross-site, but it is something a bit more involved and may not work in all browsers.
Actually, come to think of it, we might be able to ask for signed privileges from the user (for regular HTML JavaScript) in the function if the request doesn't work...But again, this will require permission from the user and not necessarily work in all browsers.
It's funny you all mention this now because I'm trying to add support for this function to work client-side in Firefox extensions by using the stream "context" argument in file_get_contents() to set up Mozilla-specific configuration, though this could also work for other HTML situations (e.g., controlling the headers and request type used for file_get_contents()). I can let you know how it goes if I can get it going, though there are a number of pieces to put together first before it can work.
Here's another dependent to file_get_contents()... I've tried to make it more useful for JavaScript by not eliminating newlines (since they could be used without semicolons). I'm guessing there could be aspects I've missed, especially if you're trying to be able to operate on PHP code.
1 2 3 4 56 7 8 9 1011 12 13 14 1516 17 18 19 2021 22 23 24 2526 | // You could use this for testing based on the PHP example, but note comment above str = "<?php\n"+ "// PHP comment here\n"+ "\n"+ "/*\n"+" * Another PHP comment\n"+ " */\n"+ "\n"+ "echo php_strip_whitespace(__FILE__);\n"+ "// Newlines are considered whitespace, and are removed too:\n"+"do_nothing();"+ "?>"; // Depends on file_get_contents() function php_strip_whitespace (file) { try { var str = file_get_contents(file); } catch (e) { return ''; } // Strip comments (both styles), reduce non-newline whitespace to one, reduce multiple newlines (preceded by any whitespace) to a newline, remove WS at beginning of line, and at end of line return str.replace(/\/\/.*?\n/g, '').replace(/\/\*[^]*?\*\//g, '').replace(/[ \f\r\t\v\u00A0\u2028\u2029]+/g, ' ').replace(/\s*\n+/g, '\n').replace(/^\s+/gm, '').replace(/\s*$/gm, ''); } alert(php_strip_whitespace('http://kevin.vanzonneveld.net/code/php_equivalents/php.namespaced.js')) |
Here's a function that depends on file_get_contents(). Note that I did not implement its second argument (for an include path), since local files aren't supported anyways. This is a fun one to play around with actually.
function get_meta_tags (file) {
var fulltxt = file_get_contents(file).match(/^[^]*<\/head>/i);
/* Kevin, you could use this for testing instead of the line above:
var fulltxt = '<meta name="author" content="name">'+
'<meta name="keywords" content="php documentation">'+
'<meta name="DESCRIPTION" content="a php manual">'+
'<meta name="geo.position" content="49.33;-86.59">'+
'</head>';*/
var patt = /<meta[^>]*?>/gim;
var txt, match, name, arr={};
while ((txt = patt.exec(fulltxt)) != null) {
var patt1 = /<meta\s+.*?name\s*=\s*(['"]?)(.*?)\1\s+.*?content\s*=\s*(['"]?)(.*?)\3/gim;
while ((match = patt1.exec(txt)) != null) {
name = match[2].replace(/\W/g, '_').toLowerCase();
arr[name] = match[4];
}
var patt2 = /<meta\s+.*?content\s*=\s*(['"?])(.*?)\1\s+.*?name\s*=\s*(['"]?)(.*?)\3/gim;
while ((match = patt2.exec(txt)) != null) {
name = match[4].replace(/\W/g, '_').toLowerCase();
arr[name] = match[2];
}
}
return arr;
}
With JS it is not posible to retrieve a local file using file() or file_get_contents(), so it may be useful to use this same method using HEAD instead of GET to have the equivalent to filesize or file_exists
1 2 3 4 56 7 8 9 1011 12 | function filesize( url ) { var req = null; try { req = new ActiveXObject("Msxml2.XMLHTTP"); } catch (e) { try { req = new ActiveXObject("Microsoft.XMLHTTP"); } catch (e) { try { req = new XMLHttpRequest(); } catch(e) {} } } if (req == null) throw new Error('XMLHttpRequest not supported'); req.open ('HEAD',url,false); req.send (null); return req.getResponseHeader('Content-Length'); } |
[Code = "Javascript"]
function file_exists( url ) {
var req = null;
try { req = new ActiveXObject("Msxml2.XMLHTTP"); } catch (e) {
try { req = new ActiveXObject("Microsoft.XMLHTTP"); } catch (e) {
try { req = new XMLHttpRequest(); } catch(e) {}
}
}
if (req == null) throw new Error('XMLHttpRequest not supported');
// HEAD Results are usually shorter (faster) than GET
req.open ('HEAD',url,false);
req.send (null);
if (req.status ==200){ return true;}
else {return false;}
}
[/Code]
I'm not quite sure if this functions fit correctly in php.js. Both functions won't work with http files in php, but as I said before, php works with local files and js does not, so working with remote http files may be somehow equivalent.
Also the file_exists function may have different answers. Status code 200 means something exists, 404 it doesn't, but there are lot's of codes that mean different things.
@ Adnan Siddiqi: That is correct. Browser will prevent that because it's considered a security risk.
@ Philippe Baumann: Good to see you back! Don't forget to checkout work in progress at: http://phpjs.org That site will have much better submit features! Added your functions btw, thanks!
Just found this project again and wanted to see how it's going. I'm very impressed how much this library has grown since.
I've found the following two functions in my development folder. Admittedly, they're not used very often and I also didn't really come up with the first one myself, but you might still find them a nice addition:
1 2 3 4 56 7 8 9 1011 12 13 14 1516 17 18 19 2021 22 23 24 25 | /* string dechex ( int $number ) Returns a string containing a hexadecimal representation of the given number argument. The largest number that can be converted is 4294967295 in decimal resulting to "ffffffff".*/ function dechex(number) { return number.toString(16); } /* number hexdec ( string $hex_string ) Returns the decimal equivalent of the hexadecimal number represented by the hex_string argument. hexdec() converts a hexadecimal string to a decimal number. hexdec() will ignore any non-hexadecimal characters it encounters. */function hexdec(hex_string) { hex_string = (hex_string+'').replace(/[^a-f0-9]/gi, ''); return parseInt(hex_string, 16); } |
By the way: Is there a better way to chat and submit functions than posting in the article for another function?


Brett Zamir
Feb 19th