# http://effbot.org/zone/bencode.htm # http://effbot.org/zone/copyright.htm # # Copyright (C) 1995-2013 by Fredrik Lundh # # By obtaining, using, and/or copying this software and/or its associated # documentation, you agree that you have read, understood, and will comply with # the following terms and conditions: # # Permission to use, copy, modify, and distribute this software and its # associated documentation for any purpose and without fee is hereby granted, # provided that the above copyright notice appears in all copies, and that both # that copyright notice and this permission notice appear in supporting # documentation, and that the name of Secret Labs AB or the author not be used # in advertising or publicity pertaining to distribution of the software # without specific, written prior permission. # # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN # NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, # INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. import re def tokenize(text, match=re.compile("([idel])|(\d+):|(-?\d+)").match): i = 0 while i < len(text): m = match(text, i) s = m.group(m.lastindex) i = m.end() if m.lastindex == 2: yield "s" yield text[i:i+int(s)] i = i + int(s) else: yield s def decode_item(next, token): if token == "i": # integer: "i" value "e" data = int(next()) if next() != "e": raise ValueError elif token == "s": # string: "s" value (virtual tokens) data = next() elif token == "l" or token == "d": # container: "l" (or "d") values "e" data = [] tok = next() while tok != "e": data.append(decode_item(next, tok)) tok = next() if token == "d": data = dict(zip(data[0::2], data[1::2])) else: raise ValueError return data def decode(text): try: src = tokenize(text) data = decode_item(src.next, src.next()) for token in src: # look for more tokens raise SyntaxError("trailing junk") except (AttributeError, ValueError, StopIteration): raise SyntaxError("syntax error") return data