from __future__ import annotations

from abc import abstractmethod
from typing import Any

from sapiopylib.rest.User import SapioUser
from sapiopylib.rest.pojo.CustomReport import RawReportTerm, RawTermOperation
from sapiopylib.rest.pojo.datatype.FieldDefinition import VeloxIntegerFieldDefinition, VeloxStringFieldDefinition, \
    AbstractVeloxFieldDefinition

from sapiopycommons.callbacks.callback_util import CallbackUtil
from sapiopycommons.files.file_data_handler import FileDataHandler, FilterList
from sapiopycommons.general.aliases import UserIdentifier, AliasUtil
from sapiopycommons.general.custom_report_util import CustomReportUtil
from sapiopycommons.general.exceptions import SapioUserCancelledException
from sapiopycommons.general.time_util import TimeUtil


class FileValidator:
    """
    The FileValidator is a class used to verify that a file provided by a user meets certain specs. For example, you
    may require that all cells in a column not be blank, or have a certain range of values, or that no two values in
    a column are the same. These validation rules can be added to the FileValidator along with the data for the file.
    Then, when the file is validated, the index of any rows that failed a rule will be returned along with the list of
    rules that they failed, allowing you to report the specifics of why a rule failed to the user.

    Look into using this in combination with FileDataHandler to prepare files for the FileValidator and for use in
    data record fields.
    """
    file_data: list[dict[str, Any]]
    rules: list[ValidationRule]

    def __init__(self, file_data: list[dict[str, Any]]):
        """
        :param file_data: A list of dictionaries. Every dictionary in the list is expected to have the same keys.
            FileUtil.tokenize_csv and tokenize_xlsx can be used to convert a file into such a list.
            CustomReportUtil can also generate lists of dictionaries that match this criteria.
        """
        self.file_data = file_data
        self.rules = []

    def add_rule(self, rule: ValidationRule) -> None:
        """
        Add a new validation rule to this validator. When the validate_file function is called, each added rule will be
        run on the file. Rules are evaluated in the same order that they are added to the validator.

        Custom validation rules can be created by defining a class that extends RowRule or ColumnRule and implements
        the validation method.

        :param rule: A validation rule to be run when the file is validated.
        """
        self.rules.append(rule)

    def validate_file(self) -> dict[int, list[ValidationRule]]:
        """
        Validate the file, returning any rule failures that are encountered.

        :return: A dictionary mapping row indices to a list of the rules that they failed. This can then be used to
            report errors back to the user who uploaded the file by checking the class type of the rules.
        """
        failed_rows: dict[int, list[ValidationRule]] = {}

        # Check each rule for this validator.
        for rule in self.rules:
            # If this is a row rule, then the rule only runs on a per-row basis. Iterate over every row in the
            # file and use the rule to validate them.
            if isinstance(rule, RowRule):
                for index, row in enumerate(self.file_data):
                    if FileDataHandler.skip_row(index, row, whitelist=rule.whitelist, blacklist=rule.blacklist):
                        continue
                    # These rules return a boolean for whether the rule passed or not.
                    if not rule.validate(row):
                        failed_rows.setdefault(index, []).append(rule)
            # If this is a column rule, then the rule runs down an entire column in the file. Pass the entire file data
            # list to the rule for validation.
            elif isinstance(rule, ColumnRule):
                # These rules return a list of row indices that caused the rule to fail for the entire column.
                for index in rule.validate(self.file_data):
                    failed_rows.setdefault(index, []).append(rule)

        return failed_rows

    def build_violation_report(self, context: UserIdentifier,
                               rule_violations: dict[int, list[ValidationRule]]) -> None:
        """
        Display a simple report of any rule violations in the file to the user as a table dialog.

        :param context: The current webhook context or a user object to send requests from.
        :param rule_violations: A dict of rule violations generated by a call to validate_file.
        """
        if not rule_violations:
            return

        file_handler = FileDataHandler(self.file_data)
        columns: list[AbstractVeloxFieldDefinition] = [
            VeloxIntegerFieldDefinition("Errors", "RowNum", "Row Number"),
            VeloxStringFieldDefinition("Errors", "Header", "Header"),
            VeloxStringFieldDefinition("Errors", "Value", "Value", max_length=500),
            VeloxStringFieldDefinition("Errors", "Reason", "Reason", max_length=2000, default_table_column_width=500)
        ]
        rows: list[dict[str, Any]] = []
        for index, violations in rule_violations.items():
            file_row: dict[str, Any] = file_handler.get_row(index)
            for violation in violations:
                if isinstance(violation, ColumnRule):
                    # Column rules always act upon a specific header, so list the header and the value for this run
                    # under that header alongside the reason.
                    rows.append({
                        "RowNum": index + 2,
                        "Header": violation.header,
                        "Value": str(file_row.get(violation.header)),
                        "Reason": violation.reason[:2000]
                    })
                elif isinstance(violation, RowRule):
                    # We can't know what header(s) a row rule was looking at, so just leave header and value
                    # blank and trust that the reason describes enough what the issue was.
                    rows.append({
                        "RowNum": index + 2,
                        "Header": "N/A",
                        "Value": "N/A",
                        "Reason": violation.reason[:2000]
                    })

        callback = CallbackUtil(context)
        callback.table_dialog("Errors", "The following rule violations were encountered in the provided file.",
                              columns, rows)

    def validate_and_report_errors(self, context: UserIdentifier) -> None:
        """
        Validate the file. If any rule violations are found, display a simple report of any rule violations in the file
        to the user as a table dialog and throw a SapioUserCancelled exception after the user acknowledges the dialog
        to end the webhook interaction.

        Shorthand for calling validate_file() and then build_violation_report() if there are any errors.

        :param context: The current webhook context or a user object to send requests from.
        """
        violations = self.validate_file()
        if violations:
            self.build_violation_report(context, violations)
            raise SapioUserCancelledException()


class ValidationRule:
    """
    The base class for all rules. Each rule has a reason that can be provided for why a file violated the rule and
    optional whitelists and blacklists for rows to skip checking the rule for.

    This class should not be extended. If you want to create a custom rule, extend RowRule or ColumnRule.
    """
    reason: str
    whitelist: FilterList
    blacklist: FilterList

    def __init__(self, reason: str, whitelist: FilterList, blacklist: FilterList):
        """
        :param reason: A string explaining the reason why an violation occurred for this rule.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        self.reason = reason
        self.blacklist = blacklist
        self.whitelist = whitelist


class RowRule(ValidationRule):
    """
    The base class for all row rules. Row rules aren't tied to a specific header and act across multiple cells in a row.
    Extend this class to create your own custom file validation rules.
    """
    def __init__(self, reason: str, whitelist: FilterList = None, blacklist: FilterList = None):
        """
        :param reason: A string explaining the reason why an violation occurred for this rule.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        super().__init__(reason, whitelist, blacklist)

    @abstractmethod
    def validate(self, row: dict[str, Any]) -> bool:
        """
        Validate that a row meets the requirements of this rule.

        :param row: A row from the file.
        :return: Whether the given row has passed the rule validation.
        """
        pass


class ColumnRule(ValidationRule):
    """
    The base class for all column rules. Column rules are tied to specific a header and act upon every cell under that
    header. Extend this class to create your own custom file validation rules.
    """
    header: str

    def __init__(self, header: str, reason: str, whitelist: FilterList = None, blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param reason: A string explaining the reason why a violation occurred for this rule.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        super().__init__(reason, whitelist, blacklist)
        self.header = header

    @abstractmethod
    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        """
        Validate that a column meets the requirements of this rule.

        :param rows: Every row from the file.
        :return: A list of the indices of all rows that failed the rule validation.
        """
        pass


class NoBlanksRule(ColumnRule):
    """
    Require that every cell in a column have a non-blank value.
    """
    def __init__(self, header: str, *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = "This value is not allowed to be blank."
        super().__init__(header, reason, whitelist, blacklist)

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        return handler.empty_cells(self.header, whitelist=self.whitelist, blacklist=self.blacklist)


class NoDuplicatesRule(ColumnRule):
    """
    Require that no two values in a column be duplicates.
    """
    def __init__(self, header: str, *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = "This value is a duplicate of another value in the same column."
        super().__init__(header, reason, whitelist, blacklist)

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        duplicates: dict[Any, list[int]] = handler.get_duplicates(self.header,
                                                                  whitelist=self.whitelist, blacklist=self.blacklist)
        indices: list[int] = []
        for cells in duplicates.values():
            indices += cells
        return indices


class AllowedValuesRule(ColumnRule):
    """
    Require that every cell in a column has a value that is present in the given list.
    """
    values: list[Any]

    def __init__(self, header: str, values: list[Any], *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param values: A list of any values.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"This value is not one of the allowed values: {', '.join([str(x) for x in values])}"
        super().__init__(header, reason, whitelist, blacklist)
        self.values = values

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        return handler.get_not_in_list(self.header, self.values, whitelist=self.whitelist, blacklist=self.blacklist)


class MatchesPatternRule(ColumnRule):
    """
    Requires that every cell in a column has a value that matches a regex pattern.
    """
    pattern: str

    def __init__(self, header: str, pattern: str, *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param pattern: A regex pattern.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"This value does not match the expected format: {pattern}."
        super().__init__(header, reason, whitelist, blacklist)
        self.pattern = pattern

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        return handler.get_mismatches(self.header, self.pattern, whitelist=self.whitelist, blacklist=self.blacklist)


class MatchesDateFormatRule(ColumnRule):
    """
    Requires that every cell in a column has a value that matches a given date format.
    """
    time_format: str

    def __init__(self, header: str, time_format: str, *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param time_format: A date/time format. See TimeUtil for more specifics.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"This value does not match the expected format: {time_format}."
        super().__init__(header, reason, whitelist, blacklist)
        self.time_format = time_format

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        return handler.get_by_function(lambda i, r: not TimeUtil.str_matches_format(r.get(self.header), self.time_format),
                                       whitelist=self.whitelist, blacklist=self.blacklist)


class ValueIsCastableRule(ColumnRule):
    """
    Requires that every cell in a column has a value that can be cast to a given type. Intended for determining if
    string values can be cast to ints or floats, but you may provide other types as well. Works by checking if
    type(value) raises an exception, so if the type you provide wouldn't raise an exception for a bad string, then
    this rule will never fail.
    """
    cast_type: type

    def __init__(self, header: str, cast_type: type, *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param cast_type: A type to attempt to cast the values under the header to.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"This value cannot be casted to the type {cast_type}"
        super().__init__(header, reason, whitelist, blacklist)
        self.cast_type = cast_type

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        # Get the index of every row that can't be cast to the desired type.
        def func(index: int, row: dict[str, Any]) -> bool:
            try:
                self.cast_type(row.get(self.header))
                return False
            except Exception:
                return True

        handler = FileDataHandler(rows)
        return handler.get_by_function(func, whitelist=self.whitelist, blacklist=self.blacklist)


class ValueInsideRangeRule(ColumnRule):
    """
    Requires that every cell in a column has a value that is inside the given range (inclusive).
    """
    min_val: float | int
    max_val: float | int

    def __init__(self, header: str, min_val: float | int, max_val: float | int,
                 *, reason: str | None = None, whitelist: FilterList = None, blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param min_val: The minimum allowed value of the cell.
        :param max_val: The maximum allowed value of the cell.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"This value is not within the range [{min_val}, {max_val}]."
        super().__init__(header, reason, whitelist, blacklist)
        self.min_val = min_val
        self.max_val = max_val

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        return handler.get_inside_range(self.header, self.min_val, self.max_val,
                                        whitelist=self.whitelist, blacklist=self.blacklist)


class ValueOutsideRangeRule(ColumnRule):
    """
    Requires that every cell in a column has a value that is outside the given range (exclusive).
    """
    min_val: float | int
    max_val: float | int

    def __init__(self, header: str, min_val: float | int, max_val: float | int,
                 *, reason: str | None = None, whitelist: FilterList = None, blacklist: FilterList = None):
        """
        :param header: The header that this rule acts upon.
        :param min_val: The value that the cell may be below.
        :param max_val: The value that the cell may be above.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"This value is not outside of the range [{min_val}, {max_val}]"
        super().__init__(header, reason, whitelist, blacklist)
        self.min_val = min_val
        self.max_val = max_val

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        handler = FileDataHandler(rows)
        return handler.get_outside_range(self.header, self.min_val, self.max_val,
                                         whitelist=self.whitelist, blacklist=self.blacklist)


class ContainsSubstringFromCellRule(RowRule):
    """
    Requires that the cell in the first column contains the value in the cell of the second column.
    """
    first: str
    second: str

    def __init__(self, first: str, second: str, *, reason: str | None = None, whitelist: FilterList = None,
                 blacklist: FilterList = None):
        """
        :param first: The header to check the contents up.
        :param second: The header to use the values of to check the above header.
        :param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
            provided if none is given by this initialization, but you may want to provide more detailed reasoning than
            the default.
        :param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
            description for the forms that a whitelist can take.
        :param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
            description for the forms that a blacklist can take.
        """
        if reason is None:
            reason = f"The value in column {first} does not contain the value in the column {second}."
        super().__init__(reason, whitelist, blacklist)
        self.first = first
        self.second = second

    def validate(self, row: dict[str, Any]) -> bool:
        return row.get(self.second) in row.get(self.first)


class UniqueSystemValueRule(ColumnRule):
    """
    Requires that every cell in the column has a value that is not already in use in the system for a given data type
    and field name.
    """
    user: SapioUser
    data_type_name: str
    data_field_name: str

    def __init__(self, context: UserIdentifier, header: str, data_type_name: str,
                 data_field_name: str):
        """
        :param context: The current webhook context or a user object to send requests from.
        :param header: The header that this rule acts upon.
        :param data_type_name: The data type name to search on.
        :param data_field_name: The data field name to search on. This is expected to be a string field.
        """
        self.user = AliasUtil.to_sapio_user(context)
        self.data_type_name = data_type_name
        self.data_field_name = data_field_name
        super().__init__(header, f"This value already exists in the system.")

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        file_handler = FileDataHandler(rows)
        values: list[str] = file_handler.get_values_list(self.header)

        # Run a quick report for all records of this type that match these field values.
        term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
                             "{" + ",".join(values) + "}")
        results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
        existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
        return file_handler.get_in_list(self.header, existing_values)


class ExistingSystemValueRule(ColumnRule):
    """
    Requires that every cell in the column has a value that is already in use in the system for a given data type
    and field name.
    """
    user: SapioUser
    data_type_name: str
    data_field_name: str

    def __init__(self, context: UserIdentifier, header: str, data_type_name: str,
                 data_field_name: str):
        """
        :param context: The current webhook context or a user object to send requests from.
        :param header: The header that this rule acts upon.
        :param data_type_name: The data type name to search on.
        :param data_field_name: The data field name to search on. This is expected to be a string field.
        """
        self.user = AliasUtil.to_sapio_user(context)
        self.data_type_name = data_type_name
        self.data_field_name = data_field_name
        super().__init__(header, f"This value doesn't exist in the system.")

    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
        file_handler = FileDataHandler(rows)
        values: list[str] = file_handler.get_values_list(self.header)

        # Run a quick report for all records of this type that match these field values.
        term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
                             "{" + ",".join(values) + "}")
        results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
        existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
        return file_handler.get_not_in_list(self.header, existing_values)
