
# standard imports
import argparse
import os
import typing

# bsie imports
from bsie.lib import BSIE, DefaultNamingPolicy
from bsie.utils import bsfs, errors, node as node_

# inner-module imports
from . import _loader

# exports
__all__: typing.Sequence[str] = (
    'main',
    )


## code ##

def main(argv):
    """Index files or directories into BSFS."""
    parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
    parser.add_argument('--config', type=str,
        default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE),
        help='Path to the config file.')
    parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'),
        help='')
    parser.add_argument('--user', type=str, default='me',
        help='')
    parser.add_argument('--collect', action='append', default=[],
        help='')
    parser.add_argument('--discard', action='append', default=[],
        help='')
    parser.add_argument('-r', '--recursive', action='store_true', default=False,
        help='')
    parser.add_argument('--follow', action='store_true', default=False,
        help='')
    parser.add_argument('--print', action='store_true', default=False,
        help='')
    parser.add_argument('input_file', nargs=argparse.REMAINDER,
        help='')
    args = parser.parse_args(argv)

    # build pipeline
    pipeline = _loader.load_pipeline(args.config)
    # build the naming policy
    naming_policy = DefaultNamingPolicy(
        host=args.host,
        user=args.user,
        )
    # build BSIE frontend
    bsie = BSIE(pipeline, naming_policy, args.collect, args.discard)

    def walk(handle):
        """Walk through given input files."""
        # FIXME: collect all triples by node, set all predicates at once
        # FIXME: simplify code (below but maybe also above)
        # FIXME: How to handle dependencies between data?
        #        E.g. do I still want to link to a tag despite not being permitted to set its label?

        # index input paths
        for path in args.input_file:
            if not os.path.exists(path):
                pass # FIXME: notify the user
            elif os.path.isdir(path) and args.recursive:
                for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow):
                    for filename in filenames:
                        for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)):
                            handle(node, pred, value)
            elif os.path.isfile(path):
                for node, pred, value in bsie.from_file(path):
                    handle(node, pred, value)
            else:
                raise errors.UnreachableError()


    if args.print:
        walk(print)
        return None

    # initialize bsfs
    # NOTE: With presistent storages, the schema migration will be a seaparte operation.
    # Here, we'd simply examine the schema and potentially discard more predicates.
    store = bsfs.Open(bsfs.init_sparql_store(args.user))
    store.migrate(bsie.schema)
    # process files
    def handle(node, pred, value):
        if isinstance(value, node_.Node):
            value = store.node(value.node_type, value.uri)
        store.node(node.node_type, node.uri).set(pred.uri, value)
    walk(handle)
    # return store
    return store


## main ##

if __name__ == '__main__':
    import sys
    main(sys.argv[1:])

## EOF ##
