本文为转载文章, 仅用于自己的知识管理收集, 如果涉及侵权,请联系 suziwen1@gmail.com,会第一时间删除
收集该文章,并非代表本人支持文中观点,只是觉得文章内容容易引起思考,讨论,有它自有的价值转载自: https://stackoverflow.com/questions/39479090/read-n-lines-of-a-big-text-file
The logic is very similar to what I wrote in my answer to filereader api on big files, except you need to keep track of the number of lines that you have processed so far (and also the last line read so far, because it may not have ended yet). The next example works for any encoding that is compatible with UTF-8; if you need another encoding look at the options for the TextDecoder
constructor.
If you are certain that the input is ASCII (or any other single-byte encoding), then you can also skip the use of TextDecoder
and directly read the input as text using the FileReader
's readAsText
method.
- 1// This is just an example of the function below.
- 2document.getElementById('start').onclick = function() {
- 3 var file = document.getElementById('infile').files[0];
- 4 if (!file) {
- 5 console.log('No file selected.');
- 6 return;
- 7 }
- 8 var maxlines = parseInt(document.getElementById('maxlines').value, 10);
- 9 var lineno = 1;
- 10 // readSomeLines is defined below.
- 11 readSomeLines(file, maxlines, function(line) {
- 12 console.log("Line: " + (lineno++) + line);
- 13 }, function onComplete() {
- 14 console.log('Read all lines');
- 15 });
- 16};
- 17
- 18/**
- 19 * Read up to and including |maxlines| lines from |file|.
- 20 *
- 21 * @param {Blob} file - The file to be read.
- 22 * @param {integer} maxlines - The maximum number of lines to read.
- 23 * @param {function(string)} forEachLine - Called for each line.
- 24 * @param {function(error)} onComplete - Called when the end of the file
- 25 * is reached or when |maxlines| lines have been read.
- 26 */
- 27function readSomeLines(file, maxlines, forEachLine, onComplete) {
- 28 var CHUNK_SIZE = 50000; // 50kb, arbitrarily chosen.
- 29 var decoder = new TextDecoder();
- 30 var offset = 0;
- 31 var linecount = 0;
- 32 var linenumber = 0;
- 33 var results = '';
- 34 var fr = new FileReader();
- 35 fr.onload = function() {
- 36 // Use stream:true in case we cut the file
- 37 // in the middle of a multi-byte character
- 38 results += decoder.decode(fr.result, {stream: true});
- 39 var lines = results.split('\n');
- 40 results = lines.pop(); // In case the line did not end yet.
- 41 linecount += lines.length;
- 42
- 43 if (linecount > maxlines) {
- 44 // Read too many lines? Truncate the results.
- 45 lines.length -= linecount - maxlines;
- 46 linecount = maxlines;
- 47 }
- 48
- 49 for (var i = 0; i < lines.length; ++i) {
- 50 forEachLine(lines[i] + '\n');
- 51 }
- 52 offset += CHUNK_SIZE;
- 53 seek();
- 54 };
- 55 fr.onerror = function() {
- 56 onComplete(fr.error);
- 57 };
- 58 seek();
- 59
- 60 function seek() {
- 61 if (linecount === maxlines) {
- 62 // We found enough lines.
- 63 onComplete(); // Done.
- 64 return;
- 65 }
- 66 if (offset !== 0 && offset >= file.size) {
- 67 // We did not find all lines, but there are no more lines.
- 68 forEachLine(results); // This is from lines.pop(), before.
- 69 onComplete(); // Done
- 70 return;
- 71 }
- 72 var slice = file.slice(offset, offset + CHUNK_SIZE);
- 73 fr.readAsArrayBuffer(slice);
- 74 }
- 75}
- 1Read <input type="number" id="maxlines"> lines from
- 2<input type="file" id="infile">.
- 3<input type="button" id="start" value="Print lines to console">