• 主页
  • 标签
  • 归档
  • 搜索
  • Github

March 02, 2021

Read n lines of a big text file

本文为转载文章, 仅用于自己的知识管理收集, 如果涉及侵权,请联系 suziwen1@gmail.com,会第一时间删除
收集该文章,并非代表本人支持文中观点,只是觉得文章内容容易引起思考,讨论,有它自有的价值

转载自: https://stackoverflow.com/questions/39479090/read-n-lines-of-a-big-text-file

The logic is very similar to what I wrote in my answer to filereader api on big files, except you need to keep track of the number of lines that you have processed so far (and also the last line read so far, because it may not have ended yet). The next example works for any encoding that is compatible with UTF-8; if you need another encoding look at the options for the TextDecoder constructor.

If you are certain that the input is ASCII (or any other single-byte encoding), then you can also skip the use of TextDecoder and directly read the input as text using the FileReader's readAsText method.

  1. 1// This is just an example of the function below. 

  2. 2document.getElementById('start').onclick = function() { 

  3. 3 var file = document.getElementById('infile').files[0]; 

  4. 4 if (!file) { 

  5. 5 console.log('No file selected.'); 

  6. 6 return; 

  7. 7 } 

  8. 8 var maxlines = parseInt(document.getElementById('maxlines').value, 10); 

  9. 9 var lineno = 1; 

  10. 10 // readSomeLines is defined below. 

  11. 11 readSomeLines(file, maxlines, function(line) { 

  12. 12 console.log("Line: " + (lineno++) + line); 

  13. 13 }, function onComplete() { 

  14. 14 console.log('Read all lines'); 

  15. 15 }); 

  16. 16}; 

  17. 17 

  18. 18/** 

  19. 19 * Read up to and including |maxlines| lines from |file|. 

  20. 20 * 

  21. 21 * @param {Blob} file - The file to be read. 

  22. 22 * @param {integer} maxlines - The maximum number of lines to read. 

  23. 23 * @param {function(string)} forEachLine - Called for each line. 

  24. 24 * @param {function(error)} onComplete - Called when the end of the file 

  25. 25 * is reached or when |maxlines| lines have been read. 

  26. 26 */ 

  27. 27function readSomeLines(file, maxlines, forEachLine, onComplete) { 

  28. 28 var CHUNK_SIZE = 50000; // 50kb, arbitrarily chosen. 

  29. 29 var decoder = new TextDecoder(); 

  30. 30 var offset = 0; 

  31. 31 var linecount = 0; 

  32. 32 var linenumber = 0; 

  33. 33 var results = ''; 

  34. 34 var fr = new FileReader(); 

  35. 35 fr.onload = function() { 

  36. 36 // Use stream:true in case we cut the file 

  37. 37 // in the middle of a multi-byte character 

  38. 38 results += decoder.decode(fr.result, {stream: true}); 

  39. 39 var lines = results.split('\n'); 

  40. 40 results = lines.pop(); // In case the line did not end yet. 

  41. 41 linecount += lines.length; 

  42. 42  

  43. 43 if (linecount > maxlines) { 

  44. 44 // Read too many lines? Truncate the results. 

  45. 45 lines.length -= linecount - maxlines; 

  46. 46 linecount = maxlines; 

  47. 47 } 

  48. 48  

  49. 49 for (var i = 0; i < lines.length; ++i) { 

  50. 50 forEachLine(lines[i] + '\n'); 

  51. 51 } 

  52. 52 offset += CHUNK_SIZE; 

  53. 53 seek(); 

  54. 54 }; 

  55. 55 fr.onerror = function() { 

  56. 56 onComplete(fr.error); 

  57. 57 }; 

  58. 58 seek(); 

  59. 59  

  60. 60 function seek() { 

  61. 61 if (linecount === maxlines) { 

  62. 62 // We found enough lines. 

  63. 63 onComplete(); // Done. 

  64. 64 return; 

  65. 65 } 

  66. 66 if (offset !== 0 && offset >= file.size) { 

  67. 67 // We did not find all lines, but there are no more lines. 

  68. 68 forEachLine(results); // This is from lines.pop(), before. 

  69. 69 onComplete(); // Done 

  70. 70 return; 

  71. 71 } 

  72. 72 var slice = file.slice(offset, offset + CHUNK_SIZE); 

  73. 73 fr.readAsArrayBuffer(slice); 

  74. 74 } 

  75. 75} 

  1. 1Read <input type="number" id="maxlines"> lines from 

  2. 2<input type="file" id="infile">. 

  3. 3<input type="button" id="start" value="Print lines to console"> 

Tagged with 文章 | 转载 | 技术
Time Flies, No Time for Nuts
Copyright © 2020 suziwen
Build with  Gatsbyjs  and  Sculpting theme