/**
 * Extracts the first email message from the HTML content by removing quoted content and signatures.
 *
 * @param {string} htmlContent - The raw HTML email content.
 * @returns {string} - The cleaned HTML content.
 */
export function extractEmailContent(htmlContent) {
    if (!htmlContent) return "";
    
    // Initialize DOMParser
    const parser = new DOMParser();
    const doc = parser.parseFromString(htmlContent, "text/html");
    
    // Define selectors that indicate quoted content or signatures
    const selectorsToRemove = [
      ".gmail_quote",
      ".gmail_attr",
      ".yahoo_quoted",
      ".OutlookMessageHeader",
      "blockquote",
      '[data-marker="__QUOTED_TEXT__"]',
      "img", // Remove images as they're usually tracking pixels or signatures
      "style", // Remove style tags
    ];
    
    // Remove unwanted elements
    selectorsToRemove.forEach((selector) => {
      doc.querySelectorAll(selector).forEach((el) => el.remove());
    });
    
    // Remove any "On [date]... wrote:" lines
    const textNodes = [];
    const walker = document.createTreeWalker(
      doc.body,
      NodeFilter.SHOW_TEXT,
      null,
      false
    );
  
    // Collect text nodes
    let currentNode = walker.nextNode();
    while (currentNode) {
      textNodes.push(currentNode);
      currentNode = walker.nextNode();
    }
  
    textNodes.forEach(node => {
      if (node.textContent.match(/On .+wrote:/)) {
        // If the node only contains this text, remove it
        if (node.textContent.trim() === node.textContent.match(/On .+wrote:/)[0]) {
          node.remove();
        } else {
          // If there's other content, just remove the "On ... wrote:" part
          node.textContent = node.textContent.replace(/On .+wrote:/, '');
        }
      }
    });
  
    // Optionally, remove any <br> tags that are immediately followed by <div> or other tags
    const brs = doc.querySelectorAll("br");
    brs.forEach((br) => {
      const next = br.nextSibling;
      if (
        next &&
        (next.nodeType === Node.ELEMENT_NODE || next.nodeType === Node.TEXT_NODE)
      ) {
        br.remove();
      }
    });
    
    // Remove all attributes from remaining elements except 'href' from links
    doc.querySelectorAll("*").forEach((el) => {
      const attrs = Array.from(el.attributes);
      attrs.forEach((attr) => {
        if (attr.name !== "href" || el.tagName.toLowerCase() !== "a") {
          el.removeAttribute(attr.name);
        }
      });
    });
  
    // Get the cleaned content and trim any whitespace
    let cleanedContent = doc.body.innerHTML.trim();
    
    // Remove any trailing "On [date]... wrote:" lines that might have been missed
    cleanedContent = cleanedContent.replace(/On .+wrote:$/, '').trim();
    
    return cleanedContent;
  }