Tell the LingQ’s dev team to use this code to change the select event listener to get the characters like ,
, .
, ?
properly with new line tags \n
.
They are probably using DOM.textContent
to get the text.
Original Text
Getted Text
Budget airlines don't save you as much as you thinkIn fact, many of them are becoming an outright scamThese companies have admitted to secretly paying their gate agents a bribe to claim passengers' bags are too big, even when they're not, forcing people to pay a ridiculously overpriced baggage feeDo you pay your employees a feeDo you incentivize them to go pick out bags from people getting on planes and kick them off the flightJust yes or no
Text got from my code
Budget airlines don't save you you think.
In fact, many of them are becoming an outright scam.
These companies have admitted to secretly paying their gate agents a bribe to claim passengers ' bags are too big, even when they're not, forcing people to pay a ridiculously overpriced baggage fee.
Do you pay your employees a fee?
Do you incentivize them to go pick out bags from people getting on planes and kick them off the flight?
Just yes or no?
function getAllLeafNodes(root) {
const leaves = [];
function traverse(node) {
if (node.nodeType === Node.TEXT_NODE) {
if (node.textContent.trim() !== "") leaves.push(node);
return;
}
if (node.nodeType === Node.ELEMENT_NODE || node.nodeType === Node.DOCUMENT_FRAGMENT_NODE) {
if (node.childNodes.length === 0) {
leaves.push(node);
return;
}
for (const child of node.childNodes) {
traverse(child);
}
}
}
traverse(root);
return leaves;
}
function extractTextFromDOM(domElement) {
const textParts = [];
let sentenceElements = domElement.querySelectorAll('.sentence');
sentenceElements = sentenceElements.length ? sentenceElements : [domElement];
if (domElement.childNodes.length === 0) return null;
sentenceElements.forEach(sentenceElement => {
for (const childNode of getAllLeafNodes(sentenceElement)) {
const text = childNode.textContent.trim();
if (text) textParts.push(text);
const parentNodeType = childNode.parentNode.nodeType;
if (parentNodeType === Node.ELEMENT_NODE && childNode.parentNode.matches('.has-end-punctuation-question')) textParts.push('?');
if (parentNodeType === Node.ELEMENT_NODE && childNode.parentNode.matches('.has-end-punctuation-period')) textParts.push('.');
}
textParts.push('\n');
});
return textParts.slice(0, -1).join(' ')
.replace(/[^\S\n]?(\?|\.|\n)[^\S\n]?/g, '$1')
.replace(/[^\S\n]?(,)/g, '$1');
}
@ecalzolaio I fixed the problem in my script. @zoran see the before/after of the script.