"""The Extractor classes transform content into triples.
"""
# standard imports
import abc
import typing

# bsie imports
from bsie.utils import bsfs, node, ns

# exports
__all__: typing.Sequence[str] = (
    'Extractor',
    )

# constants

# essential definitions typically used in extractor schemas.
# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired.
SCHEMA_PREAMBLE = '''
    # common external prefixes
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
    prefix schema: <http://schema.org/>

    # common bsfs prefixes
    prefix bsfs: <https://schema.bsfs.io/core/>
    prefix bsl: <https://schema.bsfs.io/core/Literal/>
    prefix bsa: <https://schema.bsfs.io/core/Literal/Array/>
    prefix bsd: <https://schema.bsfs.io/core/distance#>

    prefix bsie: <https://schema.bsfs.io/ie/>
    prefix bsn: <https://schema.bsfs.io/ie/Node/>
    prefix bse: <https://schema.bsfs.io/ie/Node/Entity#>
    prefix bsp: <https://schema.bsfs.io/ie/Node/Preview#>

    # default definitions
    bsl:Array rdfs:subClassOf bsfs:Literal .
    bsl:Number rdfs:subClassOf bsfs:Literal .
    bsl:Time rdfs:subClassOf bsfs:Literal .
    bsa:Feature rdfs:subClassOf bsl:Array ;
        bsfs:dimension "1"^^xsd:integer ;
        bsfs:dtype <https://schema.bsfs.io/core/dtype#f16> ;
        bsfs:distance bsd:euclidean .

    # essential nodes
    bsn:Entity rdfs:subClassOf bsfs:Node .

    # common definitions
    xsd:string rdfs:subClassOf bsfs:Literal .
    xsd:integer rdfs:subClassOf bsl:Number .
    xsd:float rdfs:subClassOf bsl:Number .

    '''


## code ##

class Extractor(abc.ABC):
    """Produce (subject, predicate, value)-triples from some content.
    The Extractor produces princpal predicates that provide information
    about the content itself (i.e., triples that include the subject),
    and may also generate triples with auxiliary predicates if the
    extracted value is a node itself.
    """

    # what type of content is expected (i.e. reader subclass).
    CONTENT_READER: typing.Optional[str] = None

    # extractor schema.
    _schema: bsfs.schema.Schema

    def __init__(self, schema: bsfs.schema.Schema):
        self._schema = schema

    def __str__(self) -> str:
        return bsfs.typename(self)

    def __repr__(self) -> str:
        return f'{bsfs.typename(self)}()'

    def __eq__(self, other: typing.Any) -> bool:
        return isinstance(other, type(self)) \
          and self.CONTENT_READER == other.CONTENT_READER \
          and self.schema == other.schema

    def __hash__(self) -> int:
        return hash((type(self), self.CONTENT_READER, self.schema))

    @property
    def schema(self) -> bsfs.schema.Schema:
        """Return the extractor's schema."""
        return self._schema

    @property
    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
        """Return the principal predicates, i.e., relations from/to the extraction subject."""
        ent = self.schema.node(ns.bsn.Entity)
        return (
            pred
            for pred
            in self.schema.predicates()
            if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
            )

    @abc.abstractmethod
    def extract(
            self,
            subject: node.Node,
            content: typing.Any,
            principals: typing.Iterable[bsfs.schema.Predicate],
            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
        """Return (node, predicate, value) triples."""
        # FIXME: type annotation could be more strict: value is Hashable

## EOF ##
