Module: regression_guard

Expand source code

# Copyright (C) 2023-present The Project Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import difflib
import os
from dataclasses import dataclass
from enum import Enum
from typing import Any
from typing import ClassVar
from typing import Dict
from typing import Literal
import yaml
from typing_extensions import Self
from cl.runtime.context.env_util import EnvUtil
from cl.runtime.records.protocols import is_key
from cl.runtime.records.protocols import is_record
from cl.runtime.schema.field_decl import primitive_types
from cl.runtime.serialization.dict_serializer import DictSerializer
from cl.runtime.serialization.string_serializer import StringSerializer

_supported_extensions = ["txt"]
"""The list of supported output file extensions (formats)."""

key_serializer = StringSerializer()
"""Serializer for keys."""

data_serializer = DictSerializer()
"""Serializer for records."""


def _error_extension_not_supported(ext: str) -> Any:
    raise RuntimeError(
        f"Extension {ext} is not supported by RegressionGuard. "
        f"Supported extensions: {', '.join(_supported_extensions)}"
    )


@dataclass(slots=True, init=False)
class RegressionGuard:
    """
    Detects changes (regression) of output across multiple channels during unit testing.

    Notes:
        - Channel name is module.test_function or module.test_class.test_method
        - The output is recorded in 'channel.received.ext' located next to the unit test
        - If 'channel.expected.ext' does not exist, it is created with the same data as 'channel.received.ext'
        - Otherwise, the test fails if 'channel.expected.ext' and 'channel.received.ext' differ
        - To record a new 'channel.expected.ext' file, delete the existing one
        - File extension 'ext' is determined based on the verify method(s) called
    """

    __guard_dict: ClassVar[Dict[str, Self]] = {}  # TODO: Set using ContextVars
    """Dictionary of existing guards indexed by the combination of output_dir and ext."""

    __delegate_to: Self | None
    """Delegate all function calls to this regression guard if set."""

    __verified: bool
    """Verify method sets this flag to true, after which further writes raise an error."""

    __exception_text: str | None
    """Exception text from an earlier verification is reused instead of comparing the files again."""

    output_path: str
    """Output path including directory and channel."""

    ext: str
    """Output file extension (format), defaults to '.txt'"""

    def __init__(
        self,
        *,
        ext: str = None,
        channel: str | None = None,
        test_function_pattern: str | None = None,
    ):
        """
        Initialize the regression guard, optionally specifying channel.

        Args:
            ext: File extension (format) without the dot prefix, defaults to 'txt'
            channel: Dot-delimited string for the channel or None for no channel
            test_function_pattern: Glob pattern for function or method in stack frame, defaults to 'test_*'
        """

        # Find base path by examining call stack
        base_path = EnvUtil.get_env_dir(test_function_pattern=test_function_pattern)

        # Make channel the filename prefix with dot delimiter if specified
        if channel is not None and channel != "":
            output_path = os.path.join(base_path, f"{channel}.")
        else:
            output_path = os.path.join(base_path, "")

        if ext is not None:
            # Remove dot prefix if specified
            ext = ext.removeprefix(".")
            if ext not in _supported_extensions:
                _error_extension_not_supported(ext)
        else:
            # Use txt if not specified
            ext = "txt"

        # Check if regression guard already exists for the same combination of output_path and ext
        dict_key = f"{output_path}.{ext}"
        if (existing_dict := self.__guard_dict.get(dict_key, None)) is not None:
            # Delegate to the existing guard if found, do not initialize other fields
            self.__delegate_to = existing_dict
        else:
            # Otherwise add self to dictionary
            self.__guard_dict[dict_key] = self

            # Initialize fields
            self.__delegate_to = None
            self.__verified = False
            self.__exception_text = None
            self.output_path = output_path
            self.ext = ext

            # Delete the existing received file if exists
            if os.path.exists(received_path := self._get_file_path("received")):
                os.remove(received_path)

    def write(self, value: Any) -> None:
        """
        Record the argument for regression testing purposes.

        Args:
            value: Data to be recorded, accepted data types depend on the specified file extension
        """

        # Perform type conversion
        if isinstance(value, Exception):
            value = f"Raises {type(value).__name__} with the message:n{str(value)}"

        # Delegate to a previously created guard with the same combination of output_path and ext if exists
        if self.__delegate_to is not None:
            self.__delegate_to.write(value)
            return

        if self.__verified:
            raise RuntimeError(
                f"Regression output file {self._get_file_path('received')} is already verified "
                f"and can no longer be written to."
            )

        received_path = self._get_file_path("received")
        received_dir = os.path.dirname(received_path)
        if not os.path.exists(received_dir):
            # Create the directory if does not exist
            os.makedirs(received_dir)

        if self.ext == "txt":
            with open(received_path, "a") as file:
                file.write(self._format_txt(value))
                # Flush immediately to ensure all of the output is on disk in the event of test exception
                file.flush()
        else:
            # Should not be reached here because of a previous check in __init__
            _error_extension_not_supported(self.ext)

    @classmethod
    def verify_all(cls, *, silent: bool = False) -> None:
        """
        For each created guard, verify that 'channel.received.ext' is the same as 'channel.expected.ext'.
        Defaults to silent=False (raises exception) for calling at the end of the test.

        Notes:
            - If 'channel.expected.ext' does not exist, create from 'channel.received.ext'
            - If files are the same, delete 'channel.received.ext' and 'channel.diff.ext'
            - If files differ, write 'channel.diff.ext' and optionally raise exception

        Args:
            silent: If true, write the diff file but do not raise exception
        """

        # Call verify for all guards silently and check if all are true
        # Because 'all' is used, the comparison will not stop early
        errors_found = not all(guard.verify(silent=True) for guard in cls.__guard_dict.values())

        if errors_found and not silent:
            # Collect exception text from guards where it is present
            exc_text_blocks = [
                exception_text
                for guard in cls.__guard_dict.values()
                if (exception_text := guard._get_exception_text()) is not None
            ]

            # Merge the collected exception text blocks and raise an error
            exc_text_merged = "n".join(exc_text_blocks)
            raise RuntimeError(exc_text_merged)

    def verify(self, *, silent: bool = False) -> bool:
        """
        Verify for this regression guard that 'channel.received.ext' is the same as 'channel.expected.ext'.
        Defaults to silent=True (no exception) to permit other tests to proceed.

        Notes:
            - If 'channel.expected.ext' does not exist, create from 'channel.received.ext'
            - If files are the same, delete 'channel.received.ext' and 'channel.diff.ext'
            - If files differ, write 'channel.diff.ext' and raise exception unless silent=True

        Returns:
            bool: True if verification succeeds and false otherwise

        Args:
            silent: If true, do not raise exception and only write the 'channel.diff.ext' file
        """

        # Delegate to a previously created guard with the same combination of output_path and ext if exists
        if self.__delegate_to is not None:
            return self.verify(silent=silent)

        if self.__verified:
            # Already verified
            if not silent:
                # Use the existing exception text to raise if silent=False
                raise RuntimeError(self.__exception_text)
            else:
                # Otherwise return True if exception text is None (it is set on verification failure)
                return self.__exception_text is None
        else:
            # Otherwise set 'verified' flag and continue
            self.__verified = True

        received_path = self._get_file_path("received")
        expected_path = self._get_file_path("expected")
        diff_path = self._get_file_path("diff")

        if not os.path.exists(received_path):
            raise RuntimeError(
                f"Regression guard error, cannot verify because " f"received file {received_path} does not yet exist."
            )

        if os.path.exists(expected_path):
            # Expected file exists, compare
            if self.__cmp_files(received_path, expected_path):
                # Received and expected match, delete the received file and diff file
                os.remove(received_path)
                if os.path.exists(diff_path):
                    os.remove(diff_path)

                # Return True to indicate verification has been successful
                return True
            else:
                # Receive an expected do not match, generate unified diff
                # TODO: Handle diff for binary output
                with open(received_path, "r") as received_file:
                    received_lines = received_file.readlines()
                with open(expected_path, "r") as expected_file:
                    expected_lines = expected_file.readlines()

                # Convert to list first because the returned object is a generator but
                # we will need to iterate over the lines more than once
                diff = list(
                    difflib.unified_diff(
                        expected_lines, received_lines, fromfile=expected_path, tofile=received_path, n=0
                    )
                )

                # Write the complete unified diff into to the diff file
                with open(diff_path, "w") as diff_file:
                    diff_file.write("".join(diff))

                # Truncate to max_lines and surround by begin/end lines for generate exception text
                line_len = 120
                max_lines = 5
                begin_str = "BEGIN REGRESSION TEST UNIFIED DIFF "
                end_str = "END REGRESSION TEST UNIFIED DIFF "
                begin_sep = "-" * (line_len - len(begin_str))
                end_sep = "-" * (line_len - len(end_str))
                orig_lines = len(diff)
                if orig_lines > max_lines:
                    diff = diff[:max_lines]
                    truncate_str = f"(TRUNCATED {orig_lines-max_lines} ADDITIONAL LINES) "
                    end_sep = end_sep[: -len(truncate_str)]
                else:
                    truncate_str = ""
                diff_str = "".join(diff)
                exception_text = f"n{begin_str}{begin_sep}n" + diff_str
                extra_eol = "" if exception_text.endswith("n") else "n"
                exception_text = exception_text + f"{extra_eol}{end_str}{truncate_str}{end_sep}"

                # Record into the object even if silent
                self.__exception_text = exception_text

                if not silent:
                    # Raise exception only when not silent
                    raise RuntimeError(exception_text)
                else:
                    return False
        else:
            # Expected file does not exist, copy the data from received to expected
            with open(received_path, "rb") as received_file, open(expected_path, "wb") as expected_file:
                expected_file.write(received_file.read())

            # Delete the received file and diff file
            os.remove(received_path)
            if os.path.exists(diff_path):
                os.remove(diff_path)

            # Verification is considered successful if expected file has been created
            return True

    def _format_txt(self, value: Any) -> str:
        """Format text for regression testing."""
        value_type = type(value)
        if value_type in primitive_types:
            # TODO: Use specialized conversion for primitive types
            return str(value) + "n"
        elif value_type == dict:
            return yaml.dump(value, default_flow_style=False, sort_keys=False) + "n"
        elif is_record(value_type):
            return data_serializer.serialize_data(value)
        elif is_key(value_type):
            return key_serializer.serialize_key(value)
        elif issubclass(value_type, Enum):
            return str(value)
        elif hasattr(value_type, "__iter__"):
            return "n".join(map(self._format_txt, value)) + "n"
        else:
            raise RuntimeError(
                f"Argument type {value_type} is not accepted for file extension '{self.ext}'. "
                f"Valid arguments are primitive types, dict, or their iterable."
            )

    def _get_exception_text(self) -> str | None:
        """Get exception text from this guard or the guard it delegates to."""
        if self.__delegate_to is not None:
            # Get from the guard this guard delegates to
            return self.__delegate_to._get_exception_text()
        else:
            # Get from this guard
            return self.__exception_text

    def _get_file_path(self, file_type: Literal["received", "expected", "diff"]) -> str:
        """The diff between received and expected is written to 'channel.diff.ext' located next to the unit test."""
        result = f"{self.output_path}{file_type}.{self.ext}"
        return result

    def __cmp_files(self, file_path_a: str, file_path_b: str) -> bool:
        """Compare two files ignoring line endings."""
        with open(file_path_a, "r") as file_a, open(file_path_b, "r") as file_b:
            for line_a, line_b in zip(file_a, file_b):
                # Strip line endings before comparing
                if line_a.rstrip("rn") != line_b.rstrip("rn"):
                    return False
            # Check if there are any remaining lines in either file
            if file_a.readline() or file_b.readline():
                return False
        return True

Global variables

var data_serializer: Serializer for records.
var key_serializer: Serializer for keys.

Classes

class RegressionGuard (*, ext: str = None, channel: str | None = None, test_function_pattern: str | None = None)

Detects changes (regression) of output across multiple channels during unit testing.

Notes

Channel name is module.test_function or module.test_class.test_method
The output is recorded in ‘channel.received.ext’ located next to the unit test
If ‘channel.expected.ext’ does not exist, it is created with the same data as ‘channel.received.ext’
Otherwise, the test fails if ‘channel.expected.ext’ and ‘channel.received.ext’ differ
To record a new ‘channel.expected.ext’ file, delete the existing one
File extension ‘ext’ is determined based on the verify method(s) called

Initialize the regression guard, optionally specifying channel.

Args

ext: File extension (format) without the dot prefix, defaults to ‘txt’
channel: Dot-delimited string for the channel or None for no channel
test_function_pattern: Glob pattern for function or method in stack frame, defaults to ‘test_*’

Expand source code

@dataclass(slots=True, init=False)
class RegressionGuard:
    """
    Detects changes (regression) of output across multiple channels during unit testing.

    Notes:
        - Channel name is module.test_function or module.test_class.test_method
        - The output is recorded in 'channel.received.ext' located next to the unit test
        - If 'channel.expected.ext' does not exist, it is created with the same data as 'channel.received.ext'
        - Otherwise, the test fails if 'channel.expected.ext' and 'channel.received.ext' differ
        - To record a new 'channel.expected.ext' file, delete the existing one
        - File extension 'ext' is determined based on the verify method(s) called
    """

    __guard_dict: ClassVar[Dict[str, Self]] = {}  # TODO: Set using ContextVars
    """Dictionary of existing guards indexed by the combination of output_dir and ext."""

    __delegate_to: Self | None
    """Delegate all function calls to this regression guard if set."""

    __verified: bool
    """Verify method sets this flag to true, after which further writes raise an error."""

    __exception_text: str | None
    """Exception text from an earlier verification is reused instead of comparing the files again."""

    output_path: str
    """Output path including directory and channel."""

    ext: str
    """Output file extension (format), defaults to '.txt'"""

    def __init__(
        self,
        *,
        ext: str = None,
        channel: str | None = None,
        test_function_pattern: str | None = None,
    ):
        """
        Initialize the regression guard, optionally specifying channel.

        Args:
            ext: File extension (format) without the dot prefix, defaults to 'txt'
            channel: Dot-delimited string for the channel or None for no channel
            test_function_pattern: Glob pattern for function or method in stack frame, defaults to 'test_*'
        """

        # Find base path by examining call stack
        base_path = EnvUtil.get_env_dir(test_function_pattern=test_function_pattern)

        # Make channel the filename prefix with dot delimiter if specified
        if channel is not None and channel != "":
            output_path = os.path.join(base_path, f"{channel}.")
        else:
            output_path = os.path.join(base_path, "")

        if ext is not None:
            # Remove dot prefix if specified
            ext = ext.removeprefix(".")
            if ext not in _supported_extensions:
                _error_extension_not_supported(ext)
        else:
            # Use txt if not specified
            ext = "txt"

        # Check if regression guard already exists for the same combination of output_path and ext
        dict_key = f"{output_path}.{ext}"
        if (existing_dict := self.__guard_dict.get(dict_key, None)) is not None:
            # Delegate to the existing guard if found, do not initialize other fields
            self.__delegate_to = existing_dict
        else:
            # Otherwise add self to dictionary
            self.__guard_dict[dict_key] = self

            # Initialize fields
            self.__delegate_to = None
            self.__verified = False
            self.__exception_text = None
            self.output_path = output_path
            self.ext = ext

            # Delete the existing received file if exists
            if os.path.exists(received_path := self._get_file_path("received")):
                os.remove(received_path)

    def write(self, value: Any) -> None:
        """
        Record the argument for regression testing purposes.

        Args:
            value: Data to be recorded, accepted data types depend on the specified file extension
        """

        # Perform type conversion
        if isinstance(value, Exception):
            value = f"Raises {type(value).__name__} with the message:n{str(value)}"

        # Delegate to a previously created guard with the same combination of output_path and ext if exists
        if self.__delegate_to is not None:
            self.__delegate_to.write(value)
            return

        if self.__verified:
            raise RuntimeError(
                f"Regression output file {self._get_file_path('received')} is already verified "
                f"and can no longer be written to."
            )

        received_path = self._get_file_path("received")
        received_dir = os.path.dirname(received_path)
        if not os.path.exists(received_dir):
            # Create the directory if does not exist
            os.makedirs(received_dir)

        if self.ext == "txt":
            with open(received_path, "a") as file:
                file.write(self._format_txt(value))
                # Flush immediately to ensure all of the output is on disk in the event of test exception
                file.flush()
        else:
            # Should not be reached here because of a previous check in __init__
            _error_extension_not_supported(self.ext)

    @classmethod
    def verify_all(cls, *, silent: bool = False) -> None:
        """
        For each created guard, verify that 'channel.received.ext' is the same as 'channel.expected.ext'.
        Defaults to silent=False (raises exception) for calling at the end of the test.

        Notes:
            - If 'channel.expected.ext' does not exist, create from 'channel.received.ext'
            - If files are the same, delete 'channel.received.ext' and 'channel.diff.ext'
            - If files differ, write 'channel.diff.ext' and optionally raise exception

        Args:
            silent: If true, write the diff file but do not raise exception
        """

        # Call verify for all guards silently and check if all are true
        # Because 'all' is used, the comparison will not stop early
        errors_found = not all(guard.verify(silent=True) for guard in cls.__guard_dict.values())

        if errors_found and not silent:
            # Collect exception text from guards where it is present
            exc_text_blocks = [
                exception_text
                for guard in cls.__guard_dict.values()
                if (exception_text := guard._get_exception_text()) is not None
            ]

            # Merge the collected exception text blocks and raise an error
            exc_text_merged = "n".join(exc_text_blocks)
            raise RuntimeError(exc_text_merged)

    def verify(self, *, silent: bool = False) -> bool:
        """
        Verify for this regression guard that 'channel.received.ext' is the same as 'channel.expected.ext'.
        Defaults to silent=True (no exception) to permit other tests to proceed.

        Notes:
            - If 'channel.expected.ext' does not exist, create from 'channel.received.ext'
            - If files are the same, delete 'channel.received.ext' and 'channel.diff.ext'
            - If files differ, write 'channel.diff.ext' and raise exception unless silent=True

        Returns:
            bool: True if verification succeeds and false otherwise

        Args:
            silent: If true, do not raise exception and only write the 'channel.diff.ext' file
        """

        # Delegate to a previously created guard with the same combination of output_path and ext if exists
        if self.__delegate_to is not None:
            return self.verify(silent=silent)

        if self.__verified:
            # Already verified
            if not silent:
                # Use the existing exception text to raise if silent=False
                raise RuntimeError(self.__exception_text)
            else:
                # Otherwise return True if exception text is None (it is set on verification failure)
                return self.__exception_text is None
        else:
            # Otherwise set 'verified' flag and continue
            self.__verified = True

        received_path = self._get_file_path("received")
        expected_path = self._get_file_path("expected")
        diff_path = self._get_file_path("diff")

        if not os.path.exists(received_path):
            raise RuntimeError(
                f"Regression guard error, cannot verify because " f"received file {received_path} does not yet exist."
            )

        if os.path.exists(expected_path):
            # Expected file exists, compare
            if self.__cmp_files(received_path, expected_path):
                # Received and expected match, delete the received file and diff file
                os.remove(received_path)
                if os.path.exists(diff_path):
                    os.remove(diff_path)

                # Return True to indicate verification has been successful
                return True
            else:
                # Receive an expected do not match, generate unified diff
                # TODO: Handle diff for binary output
                with open(received_path, "r") as received_file:
                    received_lines = received_file.readlines()
                with open(expected_path, "r") as expected_file:
                    expected_lines = expected_file.readlines()

                # Convert to list first because the returned object is a generator but
                # we will need to iterate over the lines more than once
                diff = list(
                    difflib.unified_diff(
                        expected_lines, received_lines, fromfile=expected_path, tofile=received_path, n=0
                    )
                )

                # Write the complete unified diff into to the diff file
                with open(diff_path, "w") as diff_file:
                    diff_file.write("".join(diff))

                # Truncate to max_lines and surround by begin/end lines for generate exception text
                line_len = 120
                max_lines = 5
                begin_str = "BEGIN REGRESSION TEST UNIFIED DIFF "
                end_str = "END REGRESSION TEST UNIFIED DIFF "
                begin_sep = "-" * (line_len - len(begin_str))
                end_sep = "-" * (line_len - len(end_str))
                orig_lines = len(diff)
                if orig_lines > max_lines:
                    diff = diff[:max_lines]
                    truncate_str = f"(TRUNCATED {orig_lines-max_lines} ADDITIONAL LINES) "
                    end_sep = end_sep[: -len(truncate_str)]
                else:
                    truncate_str = ""
                diff_str = "".join(diff)
                exception_text = f"n{begin_str}{begin_sep}n" + diff_str
                extra_eol = "" if exception_text.endswith("n") else "n"
                exception_text = exception_text + f"{extra_eol}{end_str}{truncate_str}{end_sep}"

                # Record into the object even if silent
                self.__exception_text = exception_text

                if not silent:
                    # Raise exception only when not silent
                    raise RuntimeError(exception_text)
                else:
                    return False
        else:
            # Expected file does not exist, copy the data from received to expected
            with open(received_path, "rb") as received_file, open(expected_path, "wb") as expected_file:
                expected_file.write(received_file.read())

            # Delete the received file and diff file
            os.remove(received_path)
            if os.path.exists(diff_path):
                os.remove(diff_path)

            # Verification is considered successful if expected file has been created
            return True

    def _format_txt(self, value: Any) -> str:
        """Format text for regression testing."""
        value_type = type(value)
        if value_type in primitive_types:
            # TODO: Use specialized conversion for primitive types
            return str(value) + "n"
        elif value_type == dict:
            return yaml.dump(value, default_flow_style=False, sort_keys=False) + "n"
        elif is_record(value_type):
            return data_serializer.serialize_data(value)
        elif is_key(value_type):
            return key_serializer.serialize_key(value)
        elif issubclass(value_type, Enum):
            return str(value)
        elif hasattr(value_type, "__iter__"):
            return "n".join(map(self._format_txt, value)) + "n"
        else:
            raise RuntimeError(
                f"Argument type {value_type} is not accepted for file extension '{self.ext}'. "
                f"Valid arguments are primitive types, dict, or their iterable."
            )

    def _get_exception_text(self) -> str | None:
        """Get exception text from this guard or the guard it delegates to."""
        if self.__delegate_to is not None:
            # Get from the guard this guard delegates to
            return self.__delegate_to._get_exception_text()
        else:
            # Get from this guard
            return self.__exception_text

    def _get_file_path(self, file_type: Literal["received", "expected", "diff"]) -> str:
        """The diff between received and expected is written to 'channel.diff.ext' located next to the unit test."""
        result = f"{self.output_path}{file_type}.{self.ext}"
        return result

    def __cmp_files(self, file_path_a: str, file_path_b: str) -> bool:
        """Compare two files ignoring line endings."""
        with open(file_path_a, "r") as file_a, open(file_path_b, "r") as file_b:
            for line_a, line_b in zip(file_a, file_b):
                # Strip line endings before comparing
                if line_a.rstrip("rn") != line_b.rstrip("rn"):
                    return False
            # Check if there are any remaining lines in either file
            if file_a.readline() or file_b.readline():
                return False
        return True

Static methods

def verify_all(*, silent: bool = False) -> None

For each created guard, verify that ‘channel.received.ext’ is the same as ‘channel.expected.ext’. Defaults to silent=False (raises exception) for calling at the end of the test.

Notes

If ‘channel.expected.ext’ does not exist, create from ‘channel.received.ext’
If files are the same, delete ‘channel.received.ext’ and ‘channel.diff.ext’
If files differ, write ‘channel.diff.ext’ and optionally raise exception

Args

silent: If true, write the diff file but do not raise exception

Fields

var ext -> str: Output file extension (format), defaults to ‘.txt’
var output_path -> str: Output path including directory and channel.

Methods

def verify(self, *, silent: bool = False) -> bool

Verify for this regression guard that ‘channel.received.ext’ is the same as ‘channel.expected.ext’. Defaults to silent=True (no exception) to permit other tests to proceed.

Notes

If ‘channel.expected.ext’ does not exist, create from ‘channel.received.ext’
If files are the same, delete ‘channel.received.ext’ and ‘channel.diff.ext’
If files differ, write ‘channel.diff.ext’ and raise exception unless silent=True

Returns

bool: True if verification succeeds and false otherwise

Args

silent: If true, do not raise exception and only write the ‘channel.diff.ext’ file

def write(self, value: Any) -> None

Record the argument for regression testing purposes.

Args

value: Data to be recorded, accepted data types depend on the specified file extension