Module: entry_key

Expand source code

# Copyright (C) 2023-present The Project Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from dataclasses import dataclass
from typing import Type
from cl.runtime.log.exceptions.user_error import UserError
from cl.runtime.primitive.case_util import CaseUtil
from cl.runtime.primitive.string_util import StringUtil
from cl.runtime.records.dataclasses_extensions import missing
from cl.runtime.records.key_mixin import KeyMixin
from cl.runtime.records.protocols import is_key

_MAX_TITLE_LEN = 1000
"""Maximum length of the title."""

_DISALLOWED_TITLE_SUBSTRINGS = {
    ":": "Colon",
    "(": "Left parenthesis",
    ")": "Right parenthesis",
    "n": "End of line",
    "r": "Carriage return",
}
"""These substrings are not allowed in title."""

_MD5_HEX_RE = re.compile(r"^[0-9a-f]+$")
"""Regex for MD5 hex."""


@dataclass(slots=True, kw_only=True)
class EntryKey(KeyMixin):
    """Contains title, body and supporting data of user entry along with the entry processing result."""

    entry_id: str = missing()
    """Based on record type, title and MD5 hash of body and data if present, EntryUtil.create_id is used to generate."""

    def init(self) -> None:
        # Check only if inside a key, will be set automatically if inside a record
        if is_key(self):
            self.check_entry_id(self.entry_id)

    @classmethod
    def get_key_type(cls) -> Type:
        return EntryKey

    @classmethod
    def get_entry_id(
        cls,
        record_type: str,
        title: str,
        body: str | None = None,
        data: str | None = None,
    ) -> str:
        """Create the unique identifier from parameters."""

        # Initial checks for the title
        if StringUtil.is_empty(title):
            raise UserError(f"Empty 'title' field in {record_type}.")
        if len(title) > _MAX_TITLE_LEN:
            raise UserError(
                f"The length {len(title)} of the 'title' field in {record_type} exceeds {_MAX_TITLE_LEN}, "
                f"use 'Entry.body' field for the excess text."
            )
        title_substrings = [name for sub, name in _DISALLOWED_TITLE_SUBSTRINGS.items() if sub in title]
        if title_substrings:
            title_substrings_str = "n".join(title_substrings)
            raise UserError(
                f"Field 'title' contains the following disallowed substrings:n{title_substrings_str}n. "
                f"Field text:n{title}"
            )

        # Combine ClassName with title
        type_and_title = f"{record_type}: {title}"

        if not StringUtil.is_empty(body) or not StringUtil.is_empty(data):
            # Append MD5 hash in hexadecimal format of the body and data if at least one is present
            md5_hash = StringUtil.md5_hex(f"{body}.{data}")
            entry_id = f"{type_and_title} (MD5: {md5_hash})"
        else:
            # Otherwise use type and title only
            entry_id = type_and_title
        return entry_id

    @classmethod
    def check_entry_id(cls, entry_id: str) -> None:
        """Check that the unique identifier is compliant with the expected format."""
        is_valid = True
        type_and_title = None
        # Validate MD5 suffix if present
        left_parenthesis_tokens = entry_id.split("(")
        if len(left_parenthesis_tokens) == 2:
            # Includes type, title and MD5 cache
            type_and_title = left_parenthesis_tokens[0]
            md5_suffix = left_parenthesis_tokens[1]
            is_valid = md5_suffix.startswith("MD5: ") and md5_suffix.endswith(")")
            md5_hex = md5_suffix[5:-1]
            is_valid = is_valid and len(md5_hex) == 32 and bool(_MD5_HEX_RE.match(md5_hex))
        elif len(left_parenthesis_tokens) == 1:
            # Includes only type and title
            type_and_title = entry_id
        else:
            is_valid = False

        # Validate type and title
        if is_valid:
            colon_tokens = type_and_title.split(": ")
            if len(colon_tokens) == 2:
                is_valid = CaseUtil.is_pascal_case(colon_tokens[0])
            else:
                is_valid = False

        # Error message if does not match format
        if not is_valid:
            raise UserError(
                f"EntryId format must be either '{{RecordType}}: {{Title}}' "
                f"or '{{RecordType}}: {{Title}} (MD5: {{lowercase hexadecimal}})'.n"
                f"EntryId: '{entry_id}'"
            )

Classes

class EntryKey (*, entry_id: str = None)

Contains title, body and supporting data of user entry along with the entry processing result.

Expand source code

@dataclass(slots=True, kw_only=True)
class EntryKey(KeyMixin):
    """Contains title, body and supporting data of user entry along with the entry processing result."""

    entry_id: str = missing()
    """Based on record type, title and MD5 hash of body and data if present, EntryUtil.create_id is used to generate."""

    def init(self) -> None:
        # Check only if inside a key, will be set automatically if inside a record
        if is_key(self):
            self.check_entry_id(self.entry_id)

    @classmethod
    def get_key_type(cls) -> Type:
        return EntryKey

    @classmethod
    def get_entry_id(
        cls,
        record_type: str,
        title: str,
        body: str | None = None,
        data: str | None = None,
    ) -> str:
        """Create the unique identifier from parameters."""

        # Initial checks for the title
        if StringUtil.is_empty(title):
            raise UserError(f"Empty 'title' field in {record_type}.")
        if len(title) > _MAX_TITLE_LEN:
            raise UserError(
                f"The length {len(title)} of the 'title' field in {record_type} exceeds {_MAX_TITLE_LEN}, "
                f"use 'Entry.body' field for the excess text."
            )
        title_substrings = [name for sub, name in _DISALLOWED_TITLE_SUBSTRINGS.items() if sub in title]
        if title_substrings:
            title_substrings_str = "n".join(title_substrings)
            raise UserError(
                f"Field 'title' contains the following disallowed substrings:n{title_substrings_str}n. "
                f"Field text:n{title}"
            )

        # Combine ClassName with title
        type_and_title = f"{record_type}: {title}"

        if not StringUtil.is_empty(body) or not StringUtil.is_empty(data):
            # Append MD5 hash in hexadecimal format of the body and data if at least one is present
            md5_hash = StringUtil.md5_hex(f"{body}.{data}")
            entry_id = f"{type_and_title} (MD5: {md5_hash})"
        else:
            # Otherwise use type and title only
            entry_id = type_and_title
        return entry_id

    @classmethod
    def check_entry_id(cls, entry_id: str) -> None:
        """Check that the unique identifier is compliant with the expected format."""
        is_valid = True
        type_and_title = None
        # Validate MD5 suffix if present
        left_parenthesis_tokens = entry_id.split("(")
        if len(left_parenthesis_tokens) == 2:
            # Includes type, title and MD5 cache
            type_and_title = left_parenthesis_tokens[0]
            md5_suffix = left_parenthesis_tokens[1]
            is_valid = md5_suffix.startswith("MD5: ") and md5_suffix.endswith(")")
            md5_hex = md5_suffix[5:-1]
            is_valid = is_valid and len(md5_hex) == 32 and bool(_MD5_HEX_RE.match(md5_hex))
        elif len(left_parenthesis_tokens) == 1:
            # Includes only type and title
            type_and_title = entry_id
        else:
            is_valid = False

        # Validate type and title
        if is_valid:
            colon_tokens = type_and_title.split(": ")
            if len(colon_tokens) == 2:
                is_valid = CaseUtil.is_pascal_case(colon_tokens[0])
            else:
                is_valid = False

        # Error message if does not match format
        if not is_valid:
            raise UserError(
                f"EntryId format must be either '{{RecordType}}: {{Title}}' "
                f"or '{{RecordType}}: {{Title}} (MD5: {{lowercase hexadecimal}})'.n"
                f"EntryId: '{entry_id}'"
            )

Ancestors

KeyMixin
abc.ABC

Subclasses

Entry

Static methods

def check_entry_id(entry_id: str) -> None: Check that the unique identifier is compliant with the expected format.
def get_entry_id(record_type: str, title: str, body: str | None = None, data: str | None = None) -> str: Create the unique identifier from parameters.
def get_key_type() -> Type: Inherited from: KeyMixin.get_key_type

Return key type even when called from a record.

Fields

var entry_id -> str: Based on record type, title and MD5 hash of body and data if present, EntryUtil.create_id is used to generate.

Methods

def init(self) -> None