commit
23d8dd9379
1 changed files with 120 additions and 0 deletions
@ -0,0 +1,120 @@ |
|||
#!/usr/bin/python |
|||
# coding=utf-8 |
|||
|
|||
import httplib; |
|||
from HTMLParser import HTMLParser |
|||
from optparse import OptionParser |
|||
|
|||
parser = OptionParser() |
|||
parser.add_option("-d", "--dico", dest="dico", help="choose the dictionnary to use (ex: enfr, fren)", metavar="DICO") |
|||
(options, args) = parser.parse_args() |
|||
|
|||
if len(args) != 1: |
|||
parser.error("incorrect number of arguments") |
|||
|
|||
word = args[0] |
|||
|
|||
if not options.dico: |
|||
dico = "enfr" |
|||
else: |
|||
dico = options.dico |
|||
|
|||
class FailedException(Exception): |
|||
def __init__(self, failed_request, status, reason, data): |
|||
self.failed_request = failed_request |
|||
self.status = status |
|||
self.reason = reason |
|||
self.data = data |
|||
|
|||
|
|||
class WordReferenceHTMLParser(HTMLParser): |
|||
translation = dict() |
|||
|
|||
def handle_starttag(self, tag, attrs): |
|||
attrs_dict = dict(attrs) |
|||
|
|||
if tag == "td" \ |
|||
and attrs_dict.has_key('class') \ |
|||
and (attrs_dict['class'] == 'FrCN2'): |
|||
pass |
|||
|
|||
elif self.waitForSpan and tag == "span": |
|||
pass |
|||
|
|||
def handle_data(self, data): |
|||
pass |
|||
|
|||
def handle_endtag(self, tag): |
|||
pass |
|||
|
|||
class CookieStore: |
|||
cookies = dict() |
|||
|
|||
def add_cookies(self, cookies_str): |
|||
if cookies_str: |
|||
for c in re.split(', ', cookies_str): |
|||
cookie = re.split('=', re.split('; ', c, 1)[0], 1) |
|||
self.cookies[cookie[0]] = cookie[1] |
|||
|
|||
def cookies_as_string(self): |
|||
cookies_str = '' |
|||
first = True |
|||
for (k, v) in self.cookies.items(): |
|||
if not first: |
|||
cookies_str += '; ' |
|||
else: |
|||
first = False |
|||
|
|||
cookies_str += '='.join([k, v]) |
|||
return cookies_str |
|||
def has_cookie(self): |
|||
return len(self.cookies) > 0 |
|||
|
|||
|
|||
class ChainedHttpRequest: |
|||
def __init__(self, method, url, expectedCode, next_request = None, headers = {}, body = ''): |
|||
self.method = method |
|||
self.url = url |
|||
self.expectedCode = expectedCode |
|||
self.headers = headers |
|||
self.body = body |
|||
self.next_request = next_request |
|||
|
|||
def process(self, connection, cookie_store, referer = None): |
|||
headers = self.headers.copy() |
|||
|
|||
if referer: |
|||
headers['Referer'] = referer |
|||
|
|||
if cookie_store.has_cookie(): |
|||
headers['Cookie'] = cookie_store.cookies_as_string() |
|||
|
|||
connection.request(self.method, self.url, self.body, headers) |
|||
response = connection.getresponse() |
|||
|
|||
if response.status == self.expectedCode: |
|||
cookie_store.add_cookies(response.getheader('Set-Cookie')) |
|||
|
|||
if self.next_request: |
|||
response.read() |
|||
return self.next_request.process(connection, cookie_store, self.url) |
|||
else: |
|||
return response.read() |
|||
else: |
|||
raise FailedException(self, response.status, response.reason, response.read()) |
|||
|
|||
|
|||
request = ChainedHttpRequest('GET', '/' + dico + '/' + word, httplib.OK) |
|||
request = ChainedHttpRequest('GET', '/', httplib.OK, request) |
|||
|
|||
store = CookieStore() |
|||
|
|||
connection = httplib.HTTPConnection('www.wordreference.com') |
|||
connection.set_debuglevel(9) |
|||
|
|||
res = request.process(connection, store, 'http://www.wordreference.com/') |
|||
|
|||
parser = WordReferenceHTMLParser() |
|||
parser.feed(res) |
|||
|
|||
|
|||
Loading…
Reference in new issue