from __future__ import absolute_import

import os
import re


def _canonicalize_image_url(url):
    try:
        s = (url or '').strip()
    except Exception:
        s = ''
    if not s:
        return ''

    # Strip query/fragment
    try:
        from urllib.parse import urlparse

        p = urlparse(s)
        s = p._replace(query='', fragment='').geturl()
    except Exception:
        pass

    # Strip common WordPress size suffix: -300x200.jpg
    try:
        s = re.sub(r'(-\d+x\d+)(?=\.[a-z0-9]+$)', '', s, count=1, flags=re.IGNORECASE)
    except Exception:
        pass

    return s


def append_enclosure_images(summary_html, enclosure_urls, base_url=None, sanitize_url=None):
    """Append enclosure <img> tags unless an equivalent image already exists."""

    try:
        from lxml import html as _lhtml
    except Exception:
        # Fallback: best-effort string contains check
        existing = summary_html or ''
        for u in (enclosure_urls or []):
            if u and u not in existing:
                existing += '<div><img src="%s"/></div>' % u
        return existing

    try:
        frags = _lhtml.fragments_fromstring(summary_html or '')
    except Exception:
        try:
            frags = [_lhtml.fragment_fromstring(summary_html or '', create_parent='div')]
        except Exception:
            frags = []

    existing = set()

    def _iter_imgs(node):
        try:
            if getattr(node, 'tag', None) == 'img':
                yield node
        except Exception:
            pass
        try:
            for x in node.xpath('.//img'):
                yield x
        except Exception:
            return

    # Collect canonical src values already present
    try:
        from urllib.parse import urljoin
    except Exception:
        urljoin = None

    for node in frags:
        for img in _iter_imgs(node):
            try:
                src = (img.get('src') or '').strip()
            except Exception:
                src = ''
            if not src:
                continue
            try:
                abs_url = urljoin(base_url or '', src) if (urljoin and base_url) else src
            except Exception:
                abs_url = src
            try:
                if sanitize_url is not None:
                    abs_url = sanitize_url(abs_url)
            except Exception:
                pass
            existing.add(_canonicalize_image_url(abs_url))

    to_add = []
    for u in (enclosure_urls or []):
        try:
            u = (u or '').strip()
        except Exception:
            u = ''
        if not u:
            continue
        try:
            abs_u = urljoin(base_url or '', u) if (urljoin and base_url) else u
        except Exception:
            abs_u = u
        try:
            if sanitize_url is not None:
                abs_u = sanitize_url(abs_u)
        except Exception:
            pass
        cu = _canonicalize_image_url(abs_u)
        if cu and cu in existing:
            continue
        if cu:
            existing.add(cu)
        to_add.append(abs_u)

    if not to_add:
        return summary_html or ''

    suffix = ''.join('<div><img src="%s"/></div>' % u for u in to_add[:8])
    return (summary_html or '') + suffix


def _write_text(path, data):
    parent = os.path.dirname(path)
    if parent and not os.path.isdir(parent):
        os.makedirs(parent)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(data)


def _make_xhtml(title, body_html, extra_css=''):
    title = title or ''
    extra_css = extra_css or ''
    head_css = ''
    if extra_css.strip():
        head_css = '<style type="text/css">\n%s\n</style>' % extra_css
    return (
        '<html><head><meta charset="utf-8"/>'
        '<title>%s</title>%s</head><body>%s</body></html>'
        % (title, head_css, body_html or '')
    )


_DEFAULT_EXPORT_CSS = """
img{max-width:95% !important; height:auto !important; width:auto !important;}
"""


def _relativize_article_images(body_html, feed_idx, item_idx):
    try:
        from lxml import html as _lhtml
    except Exception:
        # Best-effort string rewrite
        prefix = 'feed_%d/article_%d/images/' % (feed_idx, item_idx)
        return (body_html or '').replace(prefix, 'images/')

    try:
        frags = _lhtml.fragments_fromstring(body_html or '')
    except Exception:
        try:
            frags = [_lhtml.fragment_fromstring(body_html or '', create_parent='div')]
        except Exception:
            return body_html or ''

    prefix = ('feed_%d/article_%d/images/' % (feed_idx, item_idx)).replace('\\', '/')
    for node in frags:
        try:
            imgs = []
            if getattr(node, 'tag', None) == 'img':
                imgs.append(node)
            imgs.extend(list(node.xpath('.//img') or []))
        except Exception:
            imgs = []
        for img in imgs:
            try:
                src = (img.get('src') or '').strip().replace('\\', '/')
            except Exception:
                src = ''
            if not src:
                continue
            if src.startswith(prefix):
                try:
                    img.set('src', 'images/' + src[len(prefix):])
                except Exception:
                    pass
    try:
        return ''.join(_lhtml.tostring(x, encoding='unicode') for x in frags)
    except Exception:
        return body_html or ''


def build_oeb_periodical(td, news_title, feeds):
    """Create a Calibre-style periodical directory with per-article XHTML + index.opf.

    `feeds` is a list of dicts: {title: str, items: [ {title, link, published, body_html} ]}

    Returns absolute path to td/index.opf
    """

    try:
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC
    except Exception:
        raise RuntimeError('Calibre OPF helpers are not available')

    feeds = list(feeds or [])
    if not feeds:
        raise ValueError('No feeds to export')

    try:
        total_articles = sum(len(f.get('items') or []) for f in feeds)
    except Exception:
        total_articles = 0
    single_article_mode = bool(total_articles == 1)

    # Root index (skip for single-article exports to avoid extra spine/pages)
    if not single_article_mode:
        root_links = []
        root_links.append('<h1 id="index">%s</h1>' % (news_title or ''))
        if len(feeds) > 1:
            root_links.append('<ul>')
            for i, f in enumerate(feeds):
                ft = f.get('title') or 'Section'
                root_links.append('<li><a href="feed_%d/index.html">%s</a></li>' % (i, ft))
            root_links.append('</ul>')
        else:
            root_links.append('<p><a href="feed_0/index.html">%s</a></p>' % (feeds[0].get('title') or 'Section'))

        _write_text(os.path.join(td, 'index.html'), _make_xhtml(news_title, '\n'.join(root_links), extra_css=_DEFAULT_EXPORT_CSS))

    # Per-feed and per-article files
    for feed_idx, feed in enumerate(feeds):
        feed_dir = os.path.join(td, 'feed_%d' % feed_idx)
        feed_title = feed.get('title') or 'Section'
        items = list(feed.get('items') or [])

        if not single_article_mode:
            feed_index_parts = ['<h2 id="feed-%d">%s</h2>' % (feed_idx + 1, feed_title), '<ul>']
            for item_idx, it in enumerate(items):
                atitle = it.get('title') or '(untitled)'
                rel = 'article_%d/index.html' % item_idx
                feed_index_parts.append('<li><a href="%s">%s</a></li>' % (rel, atitle))
            feed_index_parts.append('</ul>')
            feed_index_parts.append('<p><a href="../index.html">Main menu</a></p>')
            _write_text(os.path.join(feed_dir, 'index.html'), _make_xhtml(feed_title, '\n'.join(feed_index_parts), extra_css=_DEFAULT_EXPORT_CSS))

        total = len(items)
        for item_idx, it in enumerate(items):
            art_dir = os.path.join(feed_dir, 'article_%d' % item_idx)
            atitle = it.get('title') or '(untitled)'
            link = it.get('link') or ''
            published = it.get('published') or ''
            body_html = it.get('body_html') or ''
            body_html = _relativize_article_images(body_html, feed_idx, item_idx)

            nav = []
            if not single_article_mode:
                nav.append('<hr/>')
                nav_links = []
                if item_idx + 1 < total:
                    nav_links.append('<a href="../article_%d/index.html">Next</a>' % (item_idx + 1))
                nav_links.append('<a href="../index.html">Section menu</a>')
                nav_links.append('<a href="../../index.html">Main menu</a>')
                if item_idx > 0:
                    nav_links.append('<a href="../article_%d/index.html">Previous</a>' % (item_idx - 1))
                nav.append('<p>| %s |</p>' % (' | '.join(nav_links)))

            footer = ''
            try:
                l = str(link or '').strip()
            except Exception:
                l = ''
            if l:
                footer = '<p>This article was downloaded by RSS Reader Plugin for calibre from <a href="%s">%s</a></p>' % (l, l)
            else:
                footer = '<p>This article was downloaded by RSS Reader Plugin for calibre</p>'

            parts = []
            parts.append('<h2 id="item-%d-%d">%s</h2>' % (feed_idx + 1, item_idx + 1, atitle))
            if l:
                parts.append('<p><a href="%s">%s</a></p>' % (l, l))
            if published:
                parts.append('<p><em>%s</em></p>' % published)
            parts.append('<div>%s</div>' % (body_html,))
            parts.append(footer)
            if nav:
                parts.append('\n'.join(nav))

            _write_text(os.path.join(art_dir, 'index.html'), _make_xhtml(atitle, '\n'.join(parts), extra_css=_DEFAULT_EXPORT_CSS))

    # Build OPF/NCX with a stable spine and ToC
    mi = MetaInformation(news_title or 'RSS Export', ['RSS Reader Plugin'])
    mi.publisher = 'calibre'
    opf_path = os.path.join(td, 'index.opf')
    ncx_path = os.path.join(td, 'index.ncx')

    opf = OPFCreator(td, mi)

    if single_article_mode:
        # Minimal manifest to avoid calibre adding non-linear index pages.
        manifest = [os.path.join(td, 'index.ncx')]
        # Include just the article directory (HTML + images)
        for i, f in enumerate(feeds):
            for j, _it in enumerate(f.get('items') or []):
                manifest.append(os.path.join(td, 'feed_%d' % i, 'article_%d' % j))
    else:
        manifest = [os.path.join(td, 'index.html'), os.path.join(td, 'index.ncx')]
        for i in range(len(feeds)):
            manifest.append(os.path.join(td, 'feed_%d' % i))
    opf.create_manifest_from_files_in(manifest)

    try:
        for mani in opf.manifest:
            if getattr(mani, 'path', '').endswith('.ncx'):
                mani.id = 'ncx'
    except Exception:
        pass

    toc = TOC(base_path=td)

    spine_entries = []
    if not single_article_mode:
        spine_entries.append('index.html')

    if len(feeds) > 1:
        for i, f in enumerate(feeds):
            feed_href = 'feed_%d/index.html' % i
            if not single_article_mode:
                spine_entries.append(feed_href)
            feed_node = toc.add_item(feed_href, None, f.get('title') or 'Section')
            for j, it in enumerate(f.get('items') or []):
                feed_node.add_item('feed_%d/article_%d/index.html' % (i, j), None, it.get('title') or '(untitled)')
                spine_entries.append('feed_%d/article_%d/index.html' % (i, j))
    else:
        if not single_article_mode:
            spine_entries.append('feed_0/index.html')
        for j, it in enumerate(feeds[0].get('items') or []):
            toc.add_item('feed_0/article_%d/index.html' % j, None, it.get('title') or '(untitled)')
            spine_entries.append('feed_0/article_%d/index.html' % j)

    opf.create_spine([os.path.join(td, p.replace('/', os.sep)) for p in spine_entries])
    opf.set_toc(toc)

    with open(opf_path, 'wb') as opf_file, open(ncx_path, 'wb') as ncx_file:
        opf.render(opf_file, ncx_file)

    return opf_path
