I've timed the existing answers. Code to reproduce is below. TLDR is that bytes(seq).decode()
is by far the fastest. Results here:
test_bytes_decode : 12.8046 µs/rep
test_join_map : 62.1697 µs/rep
test_array_library : 63.7088 µs/rep
test_join_list : 112.021 µs/rep
test_join_iterator : 171.331 µs/rep
test_naive_add : 286.632 µs/rep
Setup was CPython 3.8.2 (32-bit), Windows 10, i7-2600 3.4GHz
Interesting observations:
Code to reproduce is here:
import array, string, timeit, random
from collections import namedtuple
# Thomas Wouters (https://stackoverflow.com/a/180615/13528444)
def test_join_iterator(seq):
return ''.join(chr(c) for c in seq)
# community wiki (https://stackoverflow.com/a/181057/13528444)
def test_join_map(seq):
return ''.join(map(chr, seq))
# Thomas Vander Stichele (https://stackoverflow.com/a/180617/13528444)
def test_join_list(seq):
return ''.join([chr(c) for c in seq])
# Toni Ruža (https://stackoverflow.com/a/184708/13528444)
# Also from https://www.python.org/doc/essays/list2str/
def test_array_library(seq):
return array.array('b', seq).tobytes().decode() # Updated from tostring() for Python 3
# David White (https://stackoverflow.com/a/34246694/13528444)
def test_naive_add(seq):
output = ''
for c in seq:
output += chr(c)
return output
# Timo Herngreen (https://stackoverflow.com/a/55509509/13528444)
def test_bytes_decode(seq):
return bytes(seq).decode()
RESULT = ''.join(random.choices(string.printable, None, k=1000))
INT_SEQ = [ord(c) for c in RESULT]
REPS=10000
if __name__ == '__main__':
tests = {
name: test
for (name, test) in globals().items()
if name.startswith('test_')
}
Result = namedtuple('Result', ['name', 'passed', 'time', 'reps'])
results = [
Result(
name=name,
passed=test(INT_SEQ) == RESULT,
time=timeit.Timer(
stmt=f'{name}(INT_SEQ)',
setup=f'from __main__ import INT_SEQ, {name}'
).timeit(REPS) / REPS,
reps=REPS)
for name, test in tests.items()
]
results.sort(key=lambda r: r.time if r.passed else float('inf'))
def seconds_per_rep(secs):
(unit, amount) = (
('s', secs) if secs > 1
else ('ms', secs * 10 ** 3) if secs > (10 ** -3)
else ('µs', secs * 10 ** 6) if secs > (10 ** -6)
else ('ns', secs * 10 ** 9))
return f'{amount:.6} {unit}/rep'
max_name_length = max(len(name) for name in tests)
for r in results:
print(
r.name.rjust(max_name_length),
':',
'failed' if not r.passed else seconds_per_rep(r.time))