Decoding bencoded data with python
From TheoryOrg
I've written this because bencode.py in the BitTorrent source doesn't really handle nested bencoded data found in scrapes and this is on average 5-6 times faster than the perl implementation -- Hackeron
import re
try:
import psyco # Optional, 2.5x improvement in speed
psyco.full()
except ImportError:
pass
decimal_match = re.compile('\d')
def bdecode(data):
'''Main function to decode bencoded data'''
chunks = list(data)
chunks.reverse()
root = _dechunk(chunks)
return root
def _dechunk(chunks):
item = chunks.pop()
if (item == 'd'):
item = chunks.pop()
hash = {}
while (item != 'e'):
chunks.append(item)
key = _dechunk(chunks)
hash[key] = _dechunk(chunks)
item = chunks.pop()
return hash
elif (item == 'l'):
item = chunks.pop()
list = []
while (item != 'e'):
chunks.append(item)
list.append(_dechunk(chunks))
item = chunks.pop()
return list
elif (item == 'i'):
item = chunks.pop()
num = ''
while (item != 'e'):
num += item
item = chunks.pop()
return int(num)
elif (decimal_match.findall(item)):
num = ''
while decimal_match.findall(item):
num += item
item = chunks.pop()
line = ''
for i in range(1, int(num) + 1):
line += chunks.pop()
return line
raise "Invalid input!"
Note: this implementation turns out to be ca. twice as slow as the original Python implementation from Brahm Cohen, if psyco is not available. Anybody interested in using those encoding and decoding methods can do so with a standalone package from the Cheese Shop.

