# SPDX-License-Identifier: MIT
#
# Copyright The SCons Foundation
"""
SCons hash utility routines.
Routines for working with content and signature hashes.
"""
import functools
import hashlib
import sys
from typing import Optional, Union
from .sctypes import to_bytes
# Default hash function and format. SCons-internal.
DEFAULT_HASH_FORMATS = ['md5', 'sha1', 'sha256']
ALLOWED_HASH_FORMATS = []
_HASH_FUNCTION = None
_HASH_FORMAT = None
[docs]
def _attempt_init_of_python_3_9_hash_object(hash_function_object, sys_used=sys):
"""Initialize hash function with non-security indicator.
In Python 3.9 and onwards, :mod:`hashlib` constructors accept a
keyword argument *usedforsecurity*, which, if set to ``False``,
lets us continue to use algorithms that have been deprecated either
by FIPS or by Python itself, as the MD5 algorithm SCons prefers is
not being used for security purposes as much as a short, 32 char
hash that is resistant to accidental collisions.
In prior versions of python, :mod:`hashlib` returns a native function
wrapper, which errors out when it's queried for the optional
parameter, so this function wraps that call.
It can still throw a ValueError if the initialization fails due to
FIPS compliance issues, but that is assumed to be the responsibility
of the caller.
"""
if hash_function_object is None:
return None
# https://stackoverflow.com/a/11887885 details how to check versions
# with the "packaging" library. However, for our purposes, checking
# the version is greater than or equal to 3.9 is good enough, as the API
# is guaranteed to have support for the 'usedforsecurity' flag in 3.9. See
# https://docs.python.org/3/library/hashlib.html#:~:text=usedforsecurity
# for the version support notes.
if (sys_used.version_info.major > 3) or (
sys_used.version_info.major == 3 and sys_used.version_info.minor >= 9
):
return hash_function_object(usedforsecurity=False)
# Note that this can throw a ValueError in FIPS-enabled versions of
# Linux prior to 3.9. The OpenSSL hashlib will throw on first init here,
# but it is assumed to be responsibility of the caller to diagnose the
# ValueError & potentially display the error to screen.
return hash_function_object()
[docs]
def _set_allowed_viable_default_hashes(hashlib_used, sys_used=sys) -> None:
"""Check if the default hash algorithms can be called.
This util class is sometimes called prior to setting the
user-selected hash algorithm, meaning that on FIPS-compliant systems
the library would default-initialize MD5 and throw an exception in
set_hash_format. A common case is using the SConf options, which can
run prior to main, and thus ignore the options.hash_format variable.
This function checks the DEFAULT_HASH_FORMATS and sets the
ALLOWED_HASH_FORMATS to only the ones that can be called. In Python
>= 3.9 this will always default to MD5 as in Python 3.9 there is an
optional attribute "usedforsecurity" set for the method.
Throws if no allowed hash formats are detected.
"""
global ALLOWED_HASH_FORMATS
_last_error = None
# note: if you call this method repeatedly, example using timeout,
# this is needed. Otherwise it keeps appending valid formats to the string.
ALLOWED_HASH_FORMATS = []
for test_algorithm in DEFAULT_HASH_FORMATS:
_test_hash = getattr(hashlib_used, test_algorithm, None)
# we know hashlib claims to support it... check to see if we can call it.
if _test_hash is not None:
# The hashing library will throw an exception on initialization
# in FIPS mode, meaning if we call the default algorithm returned
# with no parameters, it'll throw if it's a bad algorithm,
# otherwise it will append it to the known good formats.
try:
_attempt_init_of_python_3_9_hash_object(_test_hash, sys_used)
ALLOWED_HASH_FORMATS.append(test_algorithm)
except ValueError as e:
_last_error = e
continue
if len(ALLOWED_HASH_FORMATS) == 0:
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
SConsEnvironmentError,
)
# chain the exception thrown with the most recent error from hashlib.
raise SConsEnvironmentError(
'No usable hash algorithms found.'
'Most recent error from hashlib attached in trace.'
) from _last_error
_set_allowed_viable_default_hashes(hashlib)
[docs]
def _attempt_get_hash_function(hash_name, hashlib_used=hashlib, sys_used=sys):
"""Wrapper used to try to initialize a hash function given.
If successful, returns the name of the hash function back to the user.
Otherwise returns None.
"""
try:
_fetch_hash = getattr(hashlib_used, hash_name, None)
if _fetch_hash is None:
return None
_attempt_init_of_python_3_9_hash_object(_fetch_hash, sys_used)
return hash_name
except ValueError:
# If attempt_init_of_python_3_9 throws, this is typically due to FIPS
# being enabled. However, if we get to this point, the viable hash
# function check has either been bypassed or otherwise failed to
# properly restrict the user to only the supported functions.
# As such throw the UserError as an internal assertion-like error.
return None
# Ensure that this is initialized in case either:
# 1. This code is running in a unit test.
# 2. This code is running in a consumer that does hash operations while
# SConscript files are being loaded.
set_hash_format(None)
[docs]
def get_current_hash_algorithm_used():
"""Returns the current hash algorithm name used.
Where the python version >= 3.9, this is expected to return md5.
If python's version is <= 3.8, this returns md5 on non-FIPS-mode platforms, and
sha1 or sha256 on FIPS-mode Linux platforms.
This function is primarily useful for testing, where one expects a value to be
one of N distinct hashes, and therefore the test needs to know which hash to select.
"""
return _HASH_FUNCTION
[docs]
def _get_hash_object(hash_format, hashlib_used=hashlib, sys_used=sys):
"""Allocates a hash object using the requested hash format.
Args:
hash_format: Hash format to use.
Returns:
hashlib object.
"""
if hash_format is None:
if _HASH_FUNCTION is None:
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
UserError,
)
raise UserError(
'There is no default hash function. Did you call '
'a hashing function before SCons was initialized?'
)
return _attempt_init_of_python_3_9_hash_object(
getattr(hashlib_used, _HASH_FUNCTION, None), sys_used
)
if not hasattr(hashlib, hash_format):
from SCons.Errors import UserError # pylint: disable=import-outside-toplevel
raise UserError(
f'Hash format "{hash_format}" is not available in your Python interpreter.'
)
return _attempt_init_of_python_3_9_hash_object(
getattr(hashlib, hash_format), sys_used
)
[docs]
def hash_signature(s, hash_format=None):
"""
Generate hash signature of a string
Args:
s: either string or bytes. Normally should be bytes
hash_format: Specify to override default hash format
Returns:
String of hex digits representing the signature
"""
m = _get_hash_object(hash_format)
try:
m.update(to_bytes(s))
except TypeError:
m.update(to_bytes(str(s)))
return m.hexdigest()
[docs]
def hash_file_signature(fname, chunksize: int=65536, hash_format=None):
"""
Generate the md5 signature of a file
Args:
fname: file to hash
chunksize: chunk size to read
hash_format: Specify to override default hash format
Returns:
String of Hex digits representing the signature
"""
m = _get_hash_object(hash_format)
with open(fname, "rb") as f:
while True:
blck = f.read(chunksize)
if not blck:
break
m.update(to_bytes(blck))
# TODO: can use this when base is Python 3.8+
# while (blk := f.read(chunksize)) != b'':
# m.update(to_bytes(blk))
return m.hexdigest()
[docs]
def hash_collect(signatures, hash_format=None):
"""
Collects a list of signatures into an aggregate signature.
Args:
signatures: a list of signatures
hash_format: Specify to override default hash format
Returns:
the aggregate signature
"""
if len(signatures) == 1:
return signatures[0]
return hash_signature(', '.join(signatures), hash_format)
_MD5_WARNING_SHOWN = False
[docs]
def _show_md5_warning(function_name) -> None:
"""Shows a deprecation warning for various MD5 functions."""
global _MD5_WARNING_SHOWN
if not _MD5_WARNING_SHOWN:
import SCons.Warnings # pylint: disable=import-outside-toplevel
SCons.Warnings.warn(
SCons.Warnings.DeprecatedWarning,
f"Function {function_name} is deprecated",
)
_MD5_WARNING_SHOWN = True
[docs]
def MD5signature(s):
"""Deprecated. Use :func:`hash_signature` instead."""
_show_md5_warning("MD5signature")
return hash_signature(s)
[docs]
def MD5filesignature(fname, chunksize: int=65536):
"""Deprecated. Use :func:`hash_file_signature` instead."""
_show_md5_warning("MD5filesignature")
return hash_file_signature(fname, chunksize)
[docs]
def MD5collect(signatures):
"""Deprecated. Use :func:`hash_collect` instead."""
_show_md5_warning("MD5collect")
return hash_collect(signatures)
# Local Variables:
# tab-width:4
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=4 shiftwidth=4: