Source code for utils.credential_redactor

# SPDX-FileCopyrightText: 2026 Julen Gamboa <j.a.r.gamboa@gmail.com>
# SPDX-License-Identifier: AGPL-3.0-or-later

"""
Script
------
credential_redactor.py

Path
----
python/hillstar/utils/credential_redactor.py

Purpose
-------
Detect and redact sensitive credentials (API keys, tokens, infrastructure identifiers, PII)
from strings, logs, and error messages. Prevents accidental data leakage in output.

Implements comprehensive credential detection covering: API keys, OAuth tokens, AWS credentials,
infrastructure identifiers, and PII based on industry standard patterns.

Inputs
------
String containing potential credentials

Outputs
-------
String with credentials redacted as [REDACTED:TYPE]

Assumptions
-----------
- Credentials follow common patterns (API key formats, token types, etc.)
- All potentially sensitive data should be redacted
- Redaction preserves string structure for error clarity

Failure Modes
-------------
None - always returns a valid string (worst case: no redactions made)

Author: Julen Gamboa <julen.gamboa.ds@gmail.com>

Created
-------
2026-02-17

Last Edited
-----------
2026-02-17
"""

import re
from typing import Optional



[docs]
class CredentialRedactor:
	"""Detect and redact sensitive credentials from strings."""

	# Patterns for credential types (Warp's list + custom patterns for Hillstar)
	PATTERNS = {
		# Warp Secret Redaction List
		# Network & Infrastructure
		"ipv4_address": r"\b((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}\b",
		"ipv6_address": r"\b((([0-9A-Fa-f]{1,4}:){1,6}:)|(([0-9A-Fa-f]{1,4}:){7}))([0-9A-Fa-f]{1,4})\b",
		"mac_address": r"\b((([a-zA-z0-9]{2}[-:]){5}([a-zA-z0-9]{2}))|(([a-zA-z0-9]{2}:){5}([a-zA-z0-9]{2})))\b",

		# PII
		"phone_number": r"\b(\+\d{1,2}\s)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b",

		# Cloud Credentials
		"aws_access_id": r"\b(AKIA|A3T|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{12,}\b",

		# Tokens & Keys (Warp list + fixes for real key formats)
		"anthropic_key": r"sk-ant-[a-zA-Z0-9\-_]{6,}",
		"openai_key": r"sk-[a-zA-Z0-9\-_]{10,}",
		"fireworks_key": r"fw_[a-zA-Z0-9]{10,}",
		"google_key": r"AIza[0-9A-Za-z\-_]{10,}",
		"google_oauth_id": r"\b[0-9]+-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com\b",
		"github_pat_classic": r"\bghp_[A-Za-z0-9_]{36}\b",
		"github_pat_fine_grained": r"\bgithub_pat_[A-Za-z0-9_]{82}\b",
		"github_oauth_token": r"\bgho_[A-Za-z0-9_]{36}\b",
		"github_user_to_server": r"\bghu_[A-Za-z0-9_]{36}\b",
		"github_server_to_server": r"\bghs_[A-Za-z0-9_]{36}\b",
		"stripe_key": r"\b(?:r|s)k_(test|live)_[0-9a-zA-Z]{24}\b",
		"firebase_domain": r"\b([a-z0-9-]){1,30}(\.firebaseapp\.com)\b",
		"json_web_token": r"\b(ey[a-zA-z0-9_\-=]{10,}\.){2}[a-zA-z0-9_\-=]{10,}\b",
		"slack_app_token": r"\bxapp-[0-9]+-[A-Za-z0-9_]+-[0-9]+-[a-f0-9]+\b",

		# Custom patterns for Hillstar
		"bearer_token": r"Bearer\s+[a-zA-Z0-9\-\._~\+\/=]{6,}",
		"api_key_generic": r"(?:api[_-]?key|api[_-]?token)\s*[=:]\s*['\"]?([a-zA-Z0-9\-_\.]+)['\"]?",
		"authorization": r"(?:Authorization|X-API-Key)\s*[=:]\s*['\"]?([a-zA-Z0-9\-_\.]+)['\"]?",
		"credentials_json": r'"(?:api_key|apiKey|access_token|accessToken|password|secret)"\s*:\s*"([^"]+)"',
		"url_password": r"(?:https?://)[^:]+:([a-zA-Z0-9\-_\.]+)@",
		"env_var_value": r"(?:ANTHROPIC_API_KEY|OPENAI_API_KEY|MISTRAL_API_KEY|GOOGLE_API_KEY)\s*=\s*([a-zA-Z0-9\-_\.]+)",
	}


[docs]
	@staticmethod
	def redact(text: Optional[str], include_patterns: Optional[list] = None) -> str:
		"""
		Redact all detected credentials from text.

		Args:
			text: String potentially containing credentials (returns empty string if None)
			include_patterns: List of pattern names to apply (default: all)

		Returns:
			String with credentials redacted as [REDACTED:TYPE]

		Examples:
			>>> redactor = CredentialRedactor()
			>>> redactor.redact("My key is sk-ant-abc123def456")
			'My key is [REDACTED:anthropic_key]'

			>>> redactor.redact('api_key = "secret-value"')
			'api_key = [REDACTED:api_key_generic]'
		"""
		if text is None:
			return ""
		if not text:
			return text

		patterns = include_patterns or list(CredentialRedactor.PATTERNS.keys())
		result = str(text)

		for pattern_name in patterns:
			if pattern_name not in CredentialRedactor.PATTERNS:
				continue

			pattern = CredentialRedactor.PATTERNS[pattern_name]
			matches = re.finditer(pattern, result, re.IGNORECASE)

			for match in reversed(list(matches)):
				# Replace the entire match with redaction marker
				start, end = match.span()
				result = result[:start] + f"[REDACTED:{pattern_name}]" + result[end:]

		return result



[docs]
	@staticmethod
	def contains_credentials(text: Optional[str]) -> bool:
		"""
		Check if text contains any detected credentials.

		Args:
			text: String to check (returns False if None)

		Returns:
			True if any credentials detected, False otherwise
		"""
		if not text:
			return False

		for pattern in CredentialRedactor.PATTERNS.values():
			if re.search(pattern, str(text), re.IGNORECASE):
				return True

		return False



[docs]
	@staticmethod
	def get_redaction_types(text: str) -> list:
		"""
		Identify which credential types are present in text.

		Args:
			text: String to analyze

		Returns:
			List of pattern names detected

		Example:
			>>> redactor.get_redaction_types("key=sk-ant-123")
			['anthropic_key', 'api_key_generic']
		"""
		if not text:
			return []

		detected = []
		for pattern_name, pattern in CredentialRedactor.PATTERNS.items():
			if re.search(pattern, str(text), re.IGNORECASE):
				detected.append(pattern_name)

		return detected




# Convenience function for one-off redaction

[docs]
def redact(text: Optional[str]) -> str:
	"""Convenience function to redact credentials from a string.

	Args:
		text: String potentially containing credentials (returns empty string if None)

	Returns:
		String with credentials redacted
	"""
	return CredentialRedactor.redact(text)




[docs]
def contains_credentials(text: Optional[str]) -> bool:
	"""Convenience function to check if string contains credentials.

	Args:
		text: String to check (returns False if None)

	Returns:
		True if credentials detected
	"""
	return CredentialRedactor.contains_credentials(text)