function get_chapter_counts(txtStr,chapterDivType) { /* txtStr - the text to be divided into chapter and word counted chapterDivType - a string indicating how chapters are headed, from this list: num - chapter is headed with a line that has a decimal number on its own num_dot - same as num, but followed by a full stop ch_num - chapter headed with a line containing the word "Chapter" followed by a number rom - chapter is headed by a Roman numeral rom_dot - as rom, but followed by a full stop ch_rom - chapter headed with a line containing the word "Chapter" followed by a Roman numeral With ch_num and ch_rom, anything following the chapter number is assumed to be a chapter title, and stored in an array of chapter titles results are placed in the following global variables: chapterTitlesArray - array of Chapter Titles, extracted by ch_num and ch_rom chapterDivTypes chapterCountsArray - array of integer word count for each chapter shortestChapterName - the Chapter Title of the shortest chapter, if chapter titles have been extracted shortestChapterCount - the word count for the shortest chapter longestChapterName - the Chapter Title of the longest chapter, if chapter titles have been extracted longestChapterCount - the word count for the longest chapter */ reset_counts(); // don't try and process an empty txtStr if (!txtStr) { return 0; } // divide txtStr into chapters according to the style of chapter heading: switch (chapterDivType) { // number on its own: // this allows for spaces & tabs before and after on the line case "num": chapterTitlesArray = txtStr.match(/^[\t ]*\d+[\t ]*$/gim); chapterTextArray = txtStr.split(/^[\t ]*\d+[\t ]*$/gim); break; // number followed by a full-stop: // this allows for spaces & tabs before and after on the line case "num_dot": chapterTitlesArray = txtStr.match(/^[\t ]*\d+\.[\t ]*$/gim); chapterTextArray = txtStr.split(/^[\t ]*\d+\.[\t ]*$/gim); break; // Chapter + number followed by any text: // this allows for spaces & tabs before the word 'chapter' case "ch_num": chapterTitlesArray = txtStr.match(/^[\t ]*chapter \d+.*$/gim); chapterTextArray = txtStr.split(/^[\t ]*chapter \d+.*$/gim); break; // roman numeral on its own: // this allows for spaces & tabs before and after on the line case "rom": chapterTitlesArray = txtStr.match(/^[\t ]*[ivxclm]+[\t ]*$/gim); chapterTextArray = txtStr.split(/^[\t ]*[ivxclm]+[\t ]*$/gim); break; // roman numeral followed by a full-stop: // this allows for spaces & tabs before and after on the line case "rom_dot": chapterTitlesArray = txtStr.match(/^[\t ]*[ivxclm]+\.[\t ]*$/gim); chapterTextArray = txtStr.split(/^[\t ]*[ivxclm]+\.[\t ]*$/gim); break; // Chapter + roman numeral followed by any text: // this allows for spaces & tabs before the word 'chapter' case "ch_rom": chapterTitlesArray = txtStr.match(/^[\t ]*chapter [ivxclm]+.*$/gim); chapterTextArray = txtStr.split(/^[\t ]*chapter [ivxclm]+.*$/gim); break; } // go through the chapter texts, getting word counts if (chapterTitlesArray !== null) { // get word counts for each chapter: chapterCount = chapterTitlesArray.length; for (i=0; i longestChapterCount) { longestChapterCount = chapterWordCount; longestChapterName = chapterName; } if ((chapterWordCount < shortestChapterCount) || (shortestChapterName == "")) { shortestChapterCount = chapterWordCount; shortestChapterName = chapterName; } } } } function get_word_count(txtStr) { // return the word count of the text in txtStr totalWordCount = 0; if (txtStr != "") { concArray = txtStr.match(/[a-z,0-9]+('[a-z]+)?/gi); if (concArray != null) { totalWordCount = concArray.length; } } return totalWordCount; } function reset_counts() { // reset global variables chapterTitlesArray = new Array(); chapterCountsArray = new Array(); shortestChapterName = ""; shortestChapterCount = 0; longestChapterName = ""; longestChapterCount = 0; } var chapterTitlesArray = new Array(); var chapterCountsArray = new Array(); var shortestChapterName = ""; var shortestChapterCount = 0; var longestChapterName = ""; var longestChapterCount = 0;