*(?P.+?) *
""",
'image' : """
CDU:.*? | (?P.*?) | ',
'encuadernacion': '
Encuadernación:.*? | .*?(?P.*?)',
'series' : ' | Colección:.*? | .*?(?P.*?)'
- }
+ }
# Compile patterns objects
self.__regExpsPO = {}
- for k, pattern in self.__regExps.iteritems():
+ for k, pattern in self.__regExps.items():
self.__regExpsPO[k] = re.compile(pattern)
self.__domTree = BasicTellicoDOM()
@@ -296,10 +296,10 @@ class MinisterioCulturaParser:
"""
Fetch HTML data from url
"""
-
+
try:
- u = urllib2.urlopen(url)
- except Exception, e:
+ u = urllib.request.urlopen(url)
+ except Exception as e:
u.close()
sys.exit("""
Network error while getting HTML content.
@@ -312,7 +312,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
def __fetchBookLinks(self):
"""
- Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
+ Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
that need to be parsed.
"""
@@ -333,10 +333,10 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
data['comments'] = []
# Empty string if series not available
- data['series_num'] = NULLSTRING
+ data['series_num'] = NULLSTRING
data['translator'] = NULLSTRING
- for name, po in self.__regExpsPO.iteritems():
+ for name, po in self.__regExpsPO.items():
data[name] = NULLSTRING
matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I)
@@ -391,22 +391,22 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
elif name == 'cdu':
data['comments'].append('CDU: ' + matches[name].group('cdu').strip())
-
+
elif name == 'notas':
data['comments'].append(matches[name].group('notas').strip())
-
+
elif name == 'series':
d = matches[name].group('series').strip()
d = re.sub(' ', ' ', d)
data[name] = d
# data[name] can contain something like 'Byblos, 162/24'
- # Maybe better to add the reg exp to get seriesNum in self.__regExps
+ # Maybe better to add the reg exp to get seriesNum in self.__regExps
p = re.compile('[0-9]+$')
s = re.search(p, data[name])
if s:
- # if series ends with a number, it seems that is a
+ # if series ends with a number, it seems that is a
# number of the book inside the series. We save in seriesNum
data['series_num'] = s.group()
@@ -434,7 +434,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
# Sometimes, the search engine outputs some image between a elements
if d.strip()[:4] != ' 0: data['medium'] = media
-
+
# get all tags
c.execute("SELECT name FROM tags WHERE tag_id IN (SELECT tag_id FROM movie_tag WHERE movie_id=%s)" % id)
tags = list([row[0].encode('utf-8') for row in c.fetchall()])
if len(tags) > 0: data['tag'] = tags
-
+
# get all languages
c.execute("SELECT name FROM languages WHERE lang_id IN (SELECT lang_id FROM movie_lang WHERE movie_id=%s)" % id)
langs = list([row[0].encode('utf-8') for row in c.fetchall()])
if len(langs) > 0: data['language'] = langs
-
+
return data
|