#! /usr/bin/env python3
"""XML Virtual FileSystem for Midnight Commander

The script requires Midnight Commander 3.1+
(http://www.midnight-commander.org/), Python 2.7+ (http://www.python.org/).

For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
For older versions put it in /usr/[local/][lib|share]/mc/extfs
and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
Make the script executable.

For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
file the command is "%cd"): cd file/xml://; in older versions it is
cd file#xml, where "file" is the name of your XML file.

See detailed installation instructions at
https://phdru.name/Software/mc/xml_INSTALL.html.

The VFS represents tags as directories; the directories are numbered to
distinguish tags with the same name; numbering also helps to sort tags by their
order in XML instead of sorting them by name and prevents name clash when tag
names coincide with the names of special files used by XML VFS. Attributes,
text nodes and comments are represented as text files; attributes are shown in
a file named "attributes", attributes are listed in the file as name=value
lines (I deliberately ignore a small chance of newline characters in values);
names and values are reencoded to the console encoding. Text nodes and comments
are collected in a file named "text", stripped and reencoded. The filesystem is
read-only.

Date/time for all directories/files set to the last modification time of the
XML file.

Implementation based on minidom doesn't understand namespaces, it just shows
them among other attributes. ElementTree-based implementation doesn't show
namespaces at all. Implementation based on lxml.etree shows namespaces in a
separate file "namespaces".

It is useful to have a top-down view on an XML structure but it's especially
convenient to extract text values from tags. One can get, for example, a
base64-encoded image - just walk down the VFS to the tag's directory and copy
its text file to a real file.

The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs

"""
from __future__ import print_function

__version__ = "1.2.1.post1"
__author__ = "Oleg Broytman <phd@phdru.name>"
__copyright__ = "Copyright (C) 2013-2025 PhiloSoft Design"
__license__ = "GPL"

# Can be None for default choice, 'lxml', 'elementtree' or 'minidom'.
force_implementation = None

use_minidom = True
use_elementtree = False
use_lxml = False

import math
from os.path import getmtime
import sys
from time import localtime
import xml.dom.minidom

try:
    import lxml.etree as etree
except ImportError:
    pass
else:
    use_lxml = True

try:
    import locale
    use_locale = True
except ImportError:
    use_locale = False

if use_locale:
    # Get the default charset.
    try:
        if sys.version_info[:2] < (3, 11):
            lcAll = locale.getdefaultlocale()
        else:
            lcAll = []
    except locale.Error as err:
        print("WARNING:", err, file=sys.stderr)
        lcAll = []

    if len(lcAll) == 2:
        default_encoding = lcAll[1]
    else:
        try:
            default_encoding = locale.getpreferredencoding()
        except locale.Error as err:
            print("WARNING:", err, file=sys.stderr)
            default_encoding = sys.getdefaultencoding()
else:
    default_encoding = sys.getdefaultencoding()

import logging
logger = logging.getLogger('xml-mcextfs')
log_err_handler = logging.StreamHandler(sys.stderr)
logger.addHandler(log_err_handler)
logger.setLevel(logging.INFO)

if len(sys.argv) < 3:
    logger.critical("""\
XML Virtual FileSystem for Midnight Commander version %s
Author: %s
%s

This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
/usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
                    __version__, __author__, __copyright__)
    sys.exit(1)


locale.setlocale(locale.LC_ALL, '')

PY3 = (sys.version_info[0] >= 3)
if PY3:
    def output(s):
        sys.stdout.buffer.write(s.encode(default_encoding, 'replace') + b'\n')
else:
    def output(s):
        sys.stdout.write(s + '\n')

    try:
        import xml.etree.ElementTree as ET
    except ImportError:
        pass
    else:
        use_elementtree = True


class XmlVfs(object):
    """Abstract base class"""

    supports_namespaces = False

    def __init__(self):
        self.xml_file = sys.argv[2]
        self.parse()

    def list(self):
        Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
        self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)

        root_comments = self.get_root_comments()
        if root_comments:
            output("-r--r--r-- 1 user group %d %s text" % (
                len(root_comments), self.xml_file_dt))

        self._list(self.getroot())

    def _list(self, node, path=''):
        n = len(self.getchildren(node))
        if n:
            width = int(math.log10(n)) + 1
            template = "%%0%dd" % width
        else:
            template = "%d"
        n = 0
        for element in self.getchildren(node):
            if not self.istag(element):
                continue
            n += 1
            tag = self.getlocalname(self.gettag(element))
            if path:
                subpath = '%s/%s %s' % (path, template % n, tag)
            else:
                subpath = '%s %s' % (template % n, tag)
            output("dr-xr-xr-x 1 user group 0 %s %s" % (
                self.xml_file_dt, subpath))
            if self.getattrs(element):
                attr_text = self.attrs2text(element)
                output("-r--r--r-- 1 user group %d %s %s/attributes" % (
                    len(attr_text), self.xml_file_dt, subpath))
            if self.supports_namespaces and self.has_ns(element):
                ns_text = self.ns2text(element)
                output("-r--r--r-- 1 user group %d %s %s/namespaces" % (
                    len(ns_text), self.xml_file_dt, subpath))
            text = self.collect_text(element)
            if text:
                output("-r--r--r-- 1 user group %d %s %s/text" % (
                    len(text), self.xml_file_dt, subpath))
            self._list(element, subpath)

    def get_tag_node(self, node, i):
        n = 0
        for element in self.getchildren(node):
            if self.istag(element):
                n += 1
                if n == i:
                    return element
        xml_error('There are less than %d nodes' % i)

    def attrs2text(self, node):
        attr_accumulator = []
        for name, value in self.getattrs(node):
            name = self.getlocalname(name)
            attr_accumulator.append("%s=%s" % (name, value))
        return '\n'.join(attr_accumulator)

    def has_ns(self, node):
        return False


class MiniDOMXmlVfs(XmlVfs):
    def parse(self):
        self.document = xml.dom.minidom.parse(self.xml_file)

    def getattrs(self, node):
        attrs = node.attributes
        attrs = [attrs.item(i) for i in range(attrs.length)]
        return [(a.name, a.value) for a in attrs]

    def collect_text(self, node):
        text_accumulator = []
        for element in node.childNodes:
            if element.localName:
                continue
            elif element.nodeType == element.COMMENT_NODE:
                text = u"<!--%s-->" % element.nodeValue
            elif element.nodeType == element.TEXT_NODE:
                text = element.nodeValue.strip()
            else:
                xml_error("Unknown node type %d" % element.nodeType)
            if text:
                text_accumulator.append(text)
        return '\n'.join(text_accumulator)

    def getroot(self):
        return self.document

    def get_root_comments(self):
        return self.collect_text(self.document)

    def getchildren(self, node):
        return node.childNodes

    def gettag(self, node):
        return node.localName

    def istag(self, node):
        return bool(node.localName)

    def getlocalname(self, name):
        return name


if use_elementtree or use_lxml:
    class CommonEtreeXmlVfs(XmlVfs):
        def getattrs(self, node):
            return node.attrib.items()

        def collect_text(self, node):
            text_accumulator = []
            if node.text:
                text = node.text.strip()
                if text:
                    text_accumulator.append(text)
            for element in node:
                if not self.istag(element):
                    text = u"<!--%s-->" % element.text
                    text_accumulator.append(text)
            if node.tail:
                text = node.tail.strip()
                if text:
                    text_accumulator.append(text)
            return '\n'.join(text_accumulator)

        def getchildren(self, node):
            return list(node)

        def gettag(self, node):
            return node.tag

        def istag(self, node):
            return isinstance(node.tag, basestring)


if use_elementtree:
    class ElementTreeXmlVfs(CommonEtreeXmlVfs):
        def parse(self):
            # Copied from http://effbot.org/zone/element-pi.htm

            class PIParser(ET.XMLTreeBuilder):

                def __init__(self):
                    ET.XMLTreeBuilder.__init__(self)
                    # assumes ElementTree 1.2.X
                    self._parser.CommentHandler = self.handle_comment
                    self._parser.ProcessingInstructionHandler = self.handle_pi
                    self._target.start("document", {})

                def close(self):
                    self._target.end("document")
                    return ET.XMLTreeBuilder.close(self)

                def handle_comment(self, data):
                    self._target.start(ET.Comment, {})
                    self._target.data(data)
                    self._target.end(ET.Comment)

                def handle_pi(self, target, data):
                    self._target.start(ET.PI, {})
                    self._target.data(target + " " + data)
                    self._target.end(ET.PI)

            self.document = ET.parse(self.xml_file, PIParser())

        def getroot(self):
            return self.document.getroot()

        def get_root_comments(self):
            text_accumulator = []
            for element in self.getroot():
                if not self.istag(element):
                    text = u"<!--%s-->" % element.text
                    text_accumulator.append(text)
            return '\n'.join(text_accumulator)

        def getlocalname(self, name):
            if name.startswith('{'):
                name = name.split('}', 1)[1]  # Remove XML namespace
            return name


if use_lxml:
    class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
        supports_namespaces = True

        def parse(self):
            self.document = etree.parse(self.xml_file)

        def getroot(self):
            return [self.document.getroot()]

        def get_root_comments(self):
            text_accumulator = []
            for element in self.document.getroot().itersiblings(
                    tag=etree.Comment, preceding=True):
                text = u"<!--%s-->" % element.text
                text_accumulator.append(text)
            return '\n'.join(text_accumulator)

        def getlocalname(self, name):
            return etree.QName(name).localname

        def _get_local_ns(self, node):
            this_nsmap = node.nsmap
            parent = node.getparent()
            if parent is not None:
                parent_nsmap = parent.nsmap
                for key in parent_nsmap:
                    if this_nsmap[key] == parent_nsmap[key]:
                        del this_nsmap[key]
            return this_nsmap

        def has_ns(self, node):
            return bool(self._get_local_ns(node))

        def ns2text(self, node):
            ns_accumulator = []
            for name, value in self._get_local_ns(node).items():
                if not name:
                    name = 'xmlns'
                ns_accumulator.append("%s=%s" % (name, value))
            return '\n'.join(ns_accumulator)


def build_xmlvfs():
    if force_implementation is None:
        if use_lxml:
            return LxmlEtreeXmlVfs()
        elif use_elementtree:
            return ElementTreeXmlVfs()
        else:
            return MiniDOMXmlVfs()
    elif force_implementation == 'minidom':
        return MiniDOMXmlVfs()
    elif force_implementation == 'elementtree':
        return ElementTreeXmlVfs()
    elif force_implementation == 'lxml':
        return LxmlEtreeXmlVfs()
    else:
        raise ValueError('Unknown implementation "%s", expected "minidom", '
                         '"elementtree" or "lxml"' % force_implementation)


def mcxml_list():
    """List the entire VFS"""

    xmlvfs = build_xmlvfs()
    xmlvfs.list()


def mcxml_copyout():
    """Extract a file from the VFS"""

    xmlvfs = build_xmlvfs()
    xml_filename = sys.argv[3]
    real_filename = sys.argv[4]

    node = xmlvfs.getroot()
    for path_comp in xml_filename.split('/'):
        if ' ' in path_comp:
            i = int(path_comp.split(' ', 1)[0])
            node = xmlvfs.get_tag_node(node, i)
        elif path_comp in ('attributes', 'namespaces', 'text'):
            break
        else:
            xml_error('Unknown file')

    if path_comp == 'attributes':
        if xmlvfs.getattrs(node):
            text = xmlvfs.attrs2text(node)
        else:
            xml_error('There are no attributes')

    elif path_comp == 'namespaces':
        if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
            text = xmlvfs.ns2text(node)
        else:
            xml_error('There are no namespaces')

    elif path_comp == 'text':
        if '/' in xml_filename:
            text = xmlvfs.collect_text(node)
        else:
            text = xmlvfs.get_root_comments()

    else:
        xml_error('Unknown file')

    outfile = open(real_filename, 'w')
    outfile.write(text)
    outfile.close()


def mcxml_copyin():
    """Put a file to the VFS"""
    sys.exit("XML VFS doesn't support adding files (read-only filesystem)")


def mcxml_rm():
    """Remove a file from the VFS"""
    sys.exit("XML VFS doesn't support removing files/directories "
             "(read-only filesystem)")

mcxml_rmdir = mcxml_rm


def mcxml_mkdir():
    """Create a directory in the VFS"""
    sys.exit("XML VFS doesn't support creating directories "
             "(read-only filesystem)")


def xml_error(error_str):
    logger.critical("Error walking XML file: %s", error_str)
    sys.exit(1)

command = sys.argv[1]
procname = "mcxml_" + command

g = globals()
if procname not in g:
    logger.critical("Unknown command %s", command)
    sys.exit(1)

try:
    g[procname]()
except SystemExit:
    raise
except Exception:
    logger.exception("Error during run")
