#! /usr/bin/env python3 from http.client import HTTPConnection, HTTPSConnection from urllib.parse import urlsplit, parse_qsl, urlencode, quote, quote_plus import sys import urllib.request url = sys.argv[1] split_results = urlsplit(url) protocol, netloc, path, query, tag = split_results user = split_results.username password = split_results.password host = split_results.hostname port = split_results.port qlist = parse_qsl(query) url = protocol + "://" if user: url += quote(user) if password: url += ':' + quote(password) url += '@' if host: host = host.encode('idna').decode('ascii') url += host if port: url += ':%d' % port if path: if protocol == "file": url += quote(path) else: url += quote(path) if query: url += '?' + urlencode(qlist) if tag: url += '#' + quote_plus(tag) if protocol == "https": server = HTTPSConnection(host, port) else: server = HTTPConnection(host, port) server.set_debuglevel(1) server.putrequest("HEAD", path) # I remember seeing some sites that return broken HTML or even HTTP response # without "compatible" user agent; I don't know if such sites are still around, # but this header doesn't cause any harm so I'd better continue to use it. # UPDATE: I saw a number of sites that forbid "Mozilla compatible" urllib_version = urllib.request.__version__ client_version = "Python-urllib/%s" % urllib_version server.putheader('User-agent', client_version) server.putheader('Accept-Charset', "koi8-r;q=1.0") server.endheaders() server.getresponse()