130 lines
4.0 KiB
Python
130 lines
4.0 KiB
Python
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
|
|
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
|
|
|
|
"""
|
|
Creates a human-readable identifier, using numbers and digits,
|
|
avoiding ambiguous numbers and letters. hash_identifier can be used
|
|
to create compact representations that are unique for a certain string
|
|
(or concatenation of strings)
|
|
"""
|
|
|
|
try:
|
|
from hashlib import md5
|
|
except ImportError:
|
|
from md5 import md5
|
|
|
|
import six
|
|
|
|
good_characters = "23456789abcdefghjkmnpqrtuvwxyz"
|
|
|
|
base = len(good_characters)
|
|
|
|
def make_identifier(number):
|
|
"""
|
|
Encodes a number as an identifier.
|
|
"""
|
|
if not isinstance(number, six.integer_types):
|
|
raise ValueError(
|
|
"You can only make identifiers out of integers (not %r)"
|
|
% number)
|
|
if number < 0:
|
|
raise ValueError(
|
|
"You cannot make identifiers out of negative numbers: %r"
|
|
% number)
|
|
result = []
|
|
while number:
|
|
next = number % base
|
|
result.append(good_characters[next])
|
|
# Note, this depends on integer rounding of results:
|
|
number = number // base
|
|
return ''.join(result)
|
|
|
|
def hash_identifier(s, length, pad=True, hasher=md5, prefix='',
|
|
group=None, upper=False):
|
|
"""
|
|
Hashes the string (with the given hashing module), then turns that
|
|
hash into an identifier of the given length (using modulo to
|
|
reduce the length of the identifier). If ``pad`` is False, then
|
|
the minimum-length identifier will be used; otherwise the
|
|
identifier will be padded with 0's as necessary.
|
|
|
|
``prefix`` will be added last, and does not count towards the
|
|
target length. ``group`` will group the characters with ``-`` in
|
|
the given lengths, and also does not count towards the target
|
|
length. E.g., ``group=4`` will cause a identifier like
|
|
``a5f3-hgk3-asdf``. Grouping occurs before the prefix.
|
|
"""
|
|
if not callable(hasher):
|
|
# Accept sha/md5 modules as well as callables
|
|
hasher = hasher.new
|
|
if length > 26 and hasher is md5:
|
|
raise ValueError(
|
|
"md5 cannot create hashes longer than 26 characters in "
|
|
"length (you gave %s)" % length)
|
|
if isinstance(s, six.text_type):
|
|
s = s.encode('utf-8')
|
|
elif not isinstance(s, six.binary_type):
|
|
s = str(s)
|
|
if six.PY3:
|
|
s = s.encode('utf-8')
|
|
h = hasher(s)
|
|
bin_hash = h.digest()
|
|
modulo = base ** length
|
|
number = 0
|
|
for c in list(bin_hash):
|
|
number = (number * 256 + six.byte2int([c])) % modulo
|
|
ident = make_identifier(number)
|
|
if pad:
|
|
ident = good_characters[0]*(length-len(ident)) + ident
|
|
if group:
|
|
parts = []
|
|
while ident:
|
|
parts.insert(0, ident[-group:])
|
|
ident = ident[:-group]
|
|
ident = '-'.join(parts)
|
|
if upper:
|
|
ident = ident.upper()
|
|
return prefix + ident
|
|
|
|
# doctest tests:
|
|
__test__ = {
|
|
'make_identifier': """
|
|
>>> make_identifier(0)
|
|
''
|
|
>>> make_identifier(1000)
|
|
'c53'
|
|
>>> make_identifier(-100)
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: You cannot make identifiers out of negative numbers: -100
|
|
>>> make_identifier('test')
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: You can only make identifiers out of integers (not 'test')
|
|
>>> make_identifier(1000000000000)
|
|
'c53x9rqh3'
|
|
""",
|
|
'hash_identifier': """
|
|
>>> hash_identifier(0, 5)
|
|
'cy2dr'
|
|
>>> hash_identifier(0, 10)
|
|
'cy2dr6rg46'
|
|
>>> hash_identifier('this is a test of a long string', 5)
|
|
'awatu'
|
|
>>> hash_identifier(0, 26)
|
|
'cy2dr6rg46cx8t4w2f3nfexzk4'
|
|
>>> hash_identifier(0, 30)
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30)
|
|
>>> hash_identifier(0, 10, group=4)
|
|
'cy-2dr6-rg46'
|
|
>>> hash_identifier(0, 10, group=4, upper=True, prefix='M-')
|
|
'M-CY-2DR6-RG46'
|
|
"""}
|
|
|
|
if __name__ == '__main__':
|
|
import doctest
|
|
doctest.testmod()
|
|
|