import axios from "axios";
import { parseTar, parseTarGzip } from "nanotar";
import { resolveService } from "../../utilities/io";

const tagRegex = /([^0-9]?[0-9]+)-?([0-9]+)?/;
const interwordRegex = /^[ .,;:“”‘’"'!?()—]+$/;
const complexWordRegex = /^([a-zA-Z]+)?([^a-zA-Z ]+)([a-zA-Z]+)?$/;
async function annotateAudio(html, audioInfo) {
	console.log("annotateAudio " + audioInfo);
	let phrases = [];
	let timings = [];
	try {
		// (!audioInfo.startsWith("http")) audioInfo = "testfiles/" + audioInfo; // get tar file holding phrases and timings
		let url = resolveService(audioInfo);
		console.log("audioInfo " + url);
		const response = await axios.get(url, {
			responseType: "arraybuffer"
		});
		console.log("response.status " + response.status);
		const data = await response.data;
		console.log(data);
		console.log("data.length (.gz) " + data.byteLength);
		console.log("starting html length " + html.length);
		let files;
		if (audioInfo.endsWith(".tar")) files = parseTar(data);
		else files = await parseTarGzip(data);
		console.log("files.length " + files.length);
		if (files.length < 2) {
			console.error("failed to get " + audioInfo);
			return;
		}
		files.sort((a, b) => {
			return a.name < b.name ? -1 : 1;
		});
		for (let i = 0; i < files.length; i += 1) {
			const file = files[i]; // .type, .name, .size, .data=>uint8array, .text=>String, .attrs
			const name = file.name;
			let contents = file.text.split("\n"); // might leave blank line at end
			if (name.indexOf("aeneas") > 0 || name.indexOf("phrases") > 0)
				phrases[phrases.length] = contents;
			else timings[timings.length] = removeTimingHeaders(contents);
		}
	} catch (error) {
		console.error("Error fetching audio phrases");
		return;
	}
	if (phrases.length === 0 || timings.length === 0) return null;
	return addAudioTags(html, phrases, timings);
}
function addAudioTags(html, allphrases, timings) {
	let index = 0;
	let result = "";
	// let count = 0;
	let separator = allphrases[0][0].indexOf("|") > 0 ? "|" : "\t"; // 1st chapter, 1st phrase
	let tagFixes = ["z", "y", "x", "w"];
	// audio is per chapter, and phrases is per chapter
	for (let chap = 1; chap <= allphrases.length; chap += 1) {
		// console.log("addAudioTags c " + chap);
		let tags = []; // keep track of used tags to correct for duplicates from aeneas
		let fixes = tagFixes;
		let chapStart = index;
		let prevlen = result.length;
		let phrases = allphrases[chap - 1];
		let carryforward = null;
		for (let iphrase = 0; iphrase < phrases.length; iphrase += 1) {
			let phrase = phrases[iphrase];
			// phrase = phrase.replace(/[()[\]&]/g, ""); // remove square brackets and & (footnote stand in) from phrases
			// phrase = phrase.replace(/[()[\]&.,;:?”“‘’]/g, ""); // remove punctuation etc, & (footnote stand in) from phrases
			phrase = phrase.replace(/[&(]/g, ""); // remove & (footnote stand in) from phrases '(\add word' is also a problem
			phrase = phrase.replace("  ", " "); // eliminate troublesome double space coming from SAB
			if (!phrase || phrase.length < 2) continue; // empty last phrase
			// console.log(index + " " + phrase);
			let parts = phrase.split(separator);
			let tag = parts[0];
			if (parts.length < 2 || parts[1].length <= 1) continue; // nothing to find
			if (tags.includes(tag)) {
				console.log("duplicate tag " + tag + " in chapter " + chap);
				let tparts = timings[chap - 1][iphrase].split("\t");
				if (tparts[2] === tag) {
					tag += fixes.pop(); // fix duplicate tag
					tparts[2] = tag;
					timings[chap - 1][iphrase] = tparts.join("\t"); // fix in timings
				} else console.warn("unable to fix duplicate tag");
			}
			tags[tags.length] = tag;
			let words = parts[1].split(" ");

			let [start, next, unclosedleft, unclosedright] = findWords(
				html,
				index,
				words,
				carryforward
			);
			if (start === null) {
				console.warn("findWords returned null at " + chap + "-" + tag);
			}
			if (start < index - 24 || start - index > 900) {
				// longest footnote?
				console.log(
					// can be over 800 for long footnotes coinciding with long verse span
					"bad find words (long gap to start), nchars " +
						(index - start) +
						" before phrase: " +
						phrase
				);
				console.log("start " + start + " next " + next);
				console.log(html.substring(start, next));
			}
			if (start - index < 1000) {
				result += html.substring(index, start); // uncaptured formatting
				const toEmbed = html.substring(start, next);
				const idspan = '<span id="' + chap + "-" + tag + '">';
				// first deal with the 'left' side
				const formatting = unclosedleft;
				carryforward = null; // findStrings can carry this forward via unclosedright
				if (unclosedleft && !carryforward) result += "</span>"; // close previous phrase formatting
				if (unclosedleft) {
					result +=
						idspan + // open marked phrase
						formatting + // reopens formatting span
						toEmbed + // formatting is closed within phrase
						"</span>"; // close one of marked phrase or formatting
				} else result += idspan + toEmbed + "</span>";
				// now deal with right side if needed
				if (unclosedright) {
					result += "</span>"; // add an extra close due to the open formatting span
					carryforward = unclosedright;
				}
			} else {
				console.log(
					"ERROR! segment too long, aborting at " +
						chap +
						"-" +
						tag +
						"; " +
						phrase
				);
				index = html.length;
				break;
			}
			// count += 1;
			if (next > index) index = next;
			else console.log(result.substring(result.length - 200));
		}
		console.log(
			"chap " +
				chap +
				": " +
				(index - chapStart) +
				" => " +
				(result.length - prevlen)
		); // diagnostic to detect where something went wrong
		if (index >= html.length) break;
	}
	// console.log("annotateAudio: " + count + " spans added");
	if (index < html.length) result += html.substring(index); // the rest of the file, if any
	return [result, timings];
}
function removeTimingHeaders(timings) {
	let len = timings.length;
	if (timings[timings.length - 1].length === 0) len -= 1;
	for (let i = 0; i < timings.length; i += 1) {
		if (!timings[i].startsWith("\\")) return timings.slice(i, len);
	}
}

function findWords(html, index, words, carryforward) {
	let special = ["sfm-ver", "sfm-c'>", "sfm-foo", "sfm-fig", "sfm-too"]; // clipped to 7 chars
	let next = index;
	let istart;
	let iword;
	let found = 0;
	let unclosedleft = carryforward;
	let unclosedright = null;
	let noisy = false; //words.join(" ") === "Faenhsou ij siujsim";
	if (noisy) console.log("noisy in findWords...");
	// console.log(html.substring(index, index + 80));
	// if (carryforward)
	// 	console.log("carryforward " + carryforward + " into " + words.join(" "));
	for (let i = 0; i < words.length; i += 1) {
		let word = words[i]; // protect against span between word and parens left by aeneas
		let limit = 0;
		while (next < html.length && limit < 10) {
			limit += 1;
			let iangle = html.indexOf("<", next);
			iword = html.indexOf(word, next);
			if (iword < 0 || iword > index + 800) {
				console.log(word + " not found nearby, analyzing...");
				// most likely word is one of those rare cases like xyz\add*-qrt or xyz\add*) or other punctuation
				// grab a forward buffer and remove one </span> at a time, corresponding to an \add*
				let match = word.match(complexWordRegex);
				console.log(match);
				if (match && match.length > 2) {
					let parts = [match[1]];
					if (match[3]) parts[1] = match[3];
					let [istart2, next2] = findWords(html, next, parts, null);
					let confirm = html.substring(istart2, next2);
					console.log("found " + confirm);
					let checks = false;
					if (parts.length === 1 && confirm.indexOf(parts[0] >= 0))
						checks = true;
					else if (
						confirm.indexOf(parts[0]) >= 0 &&
						confirm.indexOf(match[2]) > 0 && // the punctuation piece
						(match.length < 4 || confirm.indexOf(parts[1]) > 0)
					)
						checks = true;
					if (checks) {
						console.log(confirm + " passes checks");
						word = confirm; // change the word to include embedded formating & punctuation (corrects length)
						iword = istart2;
					}
				} else {
					console.error("word is not compound, giving up at " + next);
					console.log(html.substring(next, next + 80));
					return [null, null, null, null];
				}
			}
			let itest = iword < iangle || iangle < 0 ? iword : iangle; // get first to occur
			let safeGap =
				i === 0 ||
				itest <= next + 2 || // space plus one punctuation
				html.substring(next, itest).match(interwordRegex); // lots of punctuation ahead of found
			if (safeGap && iword <= iangle) {
				next = iword + word.length; // iword == iangle IF word was compound
				found += 1;
				break; // word was found
			} else if (!safeGap) {
				console.log("word mismatch for " + word + " from " + words.join(" "));
				console.log(html.substring(next, itest + 8)); // sanity check
				console.log("restarting from first word " + words[0]);
				next = istart + words[0].length; // search after previous find
				i = -1; // reset phrase
				found = 0;
				break;
			}
			// < comes first, so skip past non word
			let iclose = html.indexOf(">", iangle + 1); // '<' before word, skip over element
			let test = html.substring(iangle, iangle + 20);
			if (test.startsWith("<span class=")) {
				if (special.includes(test.substring(13))) {
					// spans with content to skip entirely
					let spanclose = html.indexOf("</span>", iangle + 12); // first </span> after special
					let iangle2 = html.indexOf("<span", iangle + 12); // check for span in span
					if (iangle2 > 0 && iangle2 < spanclose) {
						spanclose = html.indexOf("</span>", spanclose + 7); // second </span>
						let iangle3 = html.indexOf("<span", iangle2 + 5); // check for span in span in span
						if (iangle3 > 0 && iangle3 < spanclose)
							spanclose = html.indexOf("</span>", spanclose + 7); // move to 3rd close
					}
					iclose = spanclose + 6; // pointing to final '>' of final '</span>
				}
			}
			//if (noisy || index < 500)
			//console.log("skipping over " + html.substring(next, iclosed));
			if (iclose > next) {
				next = iclose + 1; // advance past one html element <...>
			} else {
				console.log("Error: failed to find element close > after " + test);
				return [null, null, null, null];
			}
		}
		if (next > index + 800) {
			console.log("probaby lost!");
			console.log(words);
			console.log(html.substring(index, index + 300));
		}
		if (i === 0) istart = iword;
		if (noisy) console.log("found " + word + " at " + iword);
	}
	let test = html.substring(istart, next);
	let spanopen = test.indexOf("<span");
	let spanclose = test.indexOf("</span");
	// first check: make sure opens and close at start of phrase are correct
	if (spanclose > 0 && spanclose < spanopen && !carryforward) {
		let spanopenleft = html.lastIndexOf("<span", istart - 6);
		let iclose = html.indexOf(">", spanopenleft);
		if (istart - iclose < 5) {
			console.log("istart moved from " + istart + " to " + spanopenleft);
			istart = spanopenleft;
		} else {
			console.log(
				"distant unclosed <span detected before words: " + // usually right before previous phrase
					html.substring(spanopen, istart)
			);
			console.log("  while processing " + words.join(" "));
			console.log(" flag for auto close, auto reopen");
			unclosedleft = html.substring(spanopen, iclose + 1); // => caller can close and reopen
		}
	}
	spanopen = test.lastIndexOf("<span");
	spanclose = test.lastIndexOf("</span");
	if (spanclose >= 0 && spanopen > spanclose) {
		spanopen += istart; // conver to offset in html
		spanclose = html.indexOf("</span>", next); // look for an immediately following </span>
		let safeGap = html.substring(next, spanclose).match(interwordRegex);
		if (safeGap) {
			//console.log("next moved from " + next + " to " + spanclose);
			next = spanclose + 7; // closed right after this phrase
		} else {
			let iclose = html.indexOf(">", spanopen);
			unclosedright = html.substring(spanopen, iclose + 1);
		}
	} else if (spanclose < 0 && spanopen < 0 && carryforward) {
		unclosedright = carryforward; // not yet closed
	}

	if (found !== words.length) {
		console.log("found only " + found + " words for " + words.join(" "));
		console.log("" + istart + " " + next);
	}
	// console.log("findWords returning " + istart + ", " + next);
	return [istart, next, unclosedleft, unclosedright];
}

function verseToTimingTag(v, timings) {
	if (v === "1:1") {
		let entry = timings[0][0]; // special case, go to first timing entry
		let tag = entry.split("\t")[2];
		return "1-" + tag;
	}
	const [chap, vers] = v.split(":");
	let iv = parseInt(vers);
	console.log(v);

	let chapTimings = timings[parseInt(chap) - 1]; // one file [lines] per chapter
	for (let i = 0; i < chapTimings.length; i += 1) {
		let timing = chapTimings[i];
		let parts = timing.split("\t"); // start, end, tag
		let tag = parts[2];
		let match = tag.match(tagRegex);
		if (match[2]) {
			if (parseInt(match[1]) > iv || iv > parseInt(match[2])) continue;
		} else {
			if (match[1] !== vers) continue;
		}
		return chap + "-" + tag;
	}
	console.log("verseToTimingTag failed to find " + v);
	return null;
}

function timingTagToTime(tag, timings) {
	const indx = tag.indexOf("-");
	let chap = tag.substring(0, indx);
	let vers = tag.substring(indx + 1);
	let chapTimings = timings[parseInt(chap) - 1];
	for (let i = 0; i < chapTimings.length; i += 1) {
		let timing = chapTimings[i];
		let parts = timing.split("\t");
		if (parts[2] === vers) return parseFloat(parts[0]);
	}
	console.log("timingTagToTime failed for " + tag);
	return null;
}

function timeToTimingTag(time, timings) {
	for (let ic = 0; ic < timings.length; ic += 1) {
		let ctimings = timings[ic];
		let chapEnd = parseFloat(ctimings[ctimings.length - 1].split("\t")[1]);
		if (time > chapEnd) continue;
		for (let i = 0; i < ctimings.length; i++) {
			let timing = ctimings[i];
			let parts = timing.split("\t");
			let end = parseFloat(parts[1]);
			if (time < end) return ic + 1 + "-" + parts[2];
		}
	}
	return null;
}

function getNextTagChange(tag, time, timings) {
	console.log("getNextTagChange after " + tag + " " + time);
	let parts;
	let end;
	let iclip = parseInt(tag.split("-")[0]);
	let ctimings = timings[iclip - 1];
	for (let i = 0; i < ctimings.length; i++) {
		let timing = ctimings[i];
		parts = timing.split("\t");
		let start = parseFloat(parts[0]);
		if (time > start) continue;
		return [iclip + "-" + parts[2], start];
	}
	end = parseFloat(parts[1]); // end time of current chapter's last clip
	if (iclip < timings.length) {
		let timing = timings[iclip][0]; // next clip (first in next chapter)
		parts = timing.split("\t");
		let nextstart = parseFloat(parts[0]);
		let newtag = iclip + 1 + "-" + parts[2];
		return [newtag, end, nextstart]; // extra field when changing clips / chapters: duration of current phrase
	}
	return [null, end, 0]; // end of last (current) clip in book
}

export {
	annotateAudio,
	addAudioTags,
	verseToTimingTag,
	timingTagToTime,
	timeToTimingTag,
	getNextTagChange
};
