1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
| import os
import urllib
import urllib2
import re
def save_image(dir,image_name,image):
if not os.path.isdir(dir):
os.makedirs(dir)
try:
image_file = open(dir + image_name,'wb')
except IOError as (error, strerror):
print "I/O error({0}):{1}".format(error,strerror)
else:
image_file.write(image)
image_file.close()
def get_image(name,student_no,login_url,picture_url,pattern):
#Parameter Information
login_type = 'yes'
#Configure opener to handle cookies
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
urllib2.install_opener(opener)
#Build Prameters
params = urllib.urlencode({'xm':name,'xh':student_no,'login':login_type})
#Open login html
f = opener.open(login_url,params)
login_html = f.read()
f.close()
#Search the image link
m = re.search(pattern,login_html)
if m is None:#The student hasn't taken picture
return None
else:
#Get the image
match_part = m.group(1)
f = opener.open(picture_url + match_part)
image = f.read()
return image
def grab_all():
dir = r'D:\\temp\\'
login_url = 'http://dawww.nju.edu.cn/xwz/login.asp'
picture_url = 'http://dawww.nju.edu.cn/xwz/picture.asp'
picture_pattern = 'src="picture.asp(\?i=\d+)"'
try:
software_student_file = open('software.txt','r')
except IOError as (error, strerror):
print "I/O error({0}):{1}".format(error,strerror)
else:
lines = software_student_file.readlines()
for line in lines:
line = line.rstrip('\n')
name = line.split(',')[0]
student_no = line.split(',')[1]
image = get_image(name,student_no,login_url,picture_url,picture_pattern)
if image is not None:
save_image(dir,student_no + '_' + name + '.jpg',image)
print student_no + '_' + name + '.jpg' + ' is saved'
software_student_file.close()
if __name__ == "__main__":
grab_all() |