|
|
|
@ -37,10 +37,10 @@ ISBN (checked) = -i %1
|
|
|
|
|
UPC (checked) = -i %1
|
|
|
|
|
Update (checked) = %{title}
|
|
|
|
|
|
|
|
|
|
** Please note that this script is also part of the Tellico's distribution.
|
|
|
|
|
** Please note that this script is also part of the Tellico's distribution.
|
|
|
|
|
** You will always find the latest version in the SVN trunk of Tellico
|
|
|
|
|
|
|
|
|
|
SVN Version:
|
|
|
|
|
SVN Version:
|
|
|
|
|
* Removes translators for Authors List
|
|
|
|
|
* Adds translators to translator field
|
|
|
|
|
* Change from "Collection" to "Series"
|
|
|
|
@ -85,7 +85,7 @@ Version 0.1:
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import sys, os, re, md5, random, string
|
|
|
|
|
import urllib, urllib2, time, base64
|
|
|
|
|
import urllib.request, urllib.parse, urllib.error, time, base64
|
|
|
|
|
import xml.dom.minidom, types
|
|
|
|
|
import socket
|
|
|
|
|
|
|
|
|
@ -95,7 +95,7 @@ NULLSTRING = ''
|
|
|
|
|
|
|
|
|
|
VERSION = "0.3.2"
|
|
|
|
|
|
|
|
|
|
ISBN, AUTHOR, TITLE = range(3)
|
|
|
|
|
ISBN, AUTHOR, TITLE = list(range(3))
|
|
|
|
|
|
|
|
|
|
TRANSLATOR_STR = "tr."
|
|
|
|
|
EDLIT_STR = "ed. lit."
|
|
|
|
@ -111,16 +111,16 @@ class BasicTellicoDOM:
|
|
|
|
|
self.__root = self.__doc.createElement('tellico')
|
|
|
|
|
self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
|
|
|
|
|
self.__root.setAttribute('syntaxVersion', '9')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.__collection = self.__doc.createElement('collection')
|
|
|
|
|
self.__collection.setAttribute('title', 'My Books')
|
|
|
|
|
self.__collection.setAttribute('type', '2')
|
|
|
|
|
|
|
|
|
|
self.__fields = self.__doc.createElement('fields')
|
|
|
|
|
self.__fields = self.__doc.createElement('fields')
|
|
|
|
|
# Add all default (standard) fields
|
|
|
|
|
self.__dfltField = self.__doc.createElement('field')
|
|
|
|
|
self.__dfltField.setAttribute('name', '_default')
|
|
|
|
|
|
|
|
|
|
self.__dfltField = self.__doc.createElement('field')
|
|
|
|
|
self.__dfltField.setAttribute('name', '_default')
|
|
|
|
|
|
|
|
|
|
# Add a custom 'Collection' field (Left by reference for
|
|
|
|
|
# the future)
|
|
|
|
|
#self.__customCollectionField = self.__doc.createElement('field')
|
|
|
|
@ -146,18 +146,18 @@ class BasicTellicoDOM:
|
|
|
|
|
|
|
|
|
|
def addEntry(self, movieData):
|
|
|
|
|
"""
|
|
|
|
|
Add a comic entry.
|
|
|
|
|
Add a comic entry.
|
|
|
|
|
Returns an entry node instance
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
d = movieData
|
|
|
|
|
|
|
|
|
|
# Convert all strings to UTF-8
|
|
|
|
|
for i in d.keys():
|
|
|
|
|
if type(d[i]) == types.ListType:
|
|
|
|
|
d[i] = [unicode(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))]
|
|
|
|
|
elif type(d[i]) == types.StringType:
|
|
|
|
|
d[i] = unicode(d[i], 'latin-1').encode('utf-8')
|
|
|
|
|
for i in list(d.keys()):
|
|
|
|
|
if type(d[i]) == list:
|
|
|
|
|
d[i] = [str(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))]
|
|
|
|
|
elif type(d[i]) == bytes:
|
|
|
|
|
d[i] = str(d[i], 'latin-1').encode('utf-8')
|
|
|
|
|
|
|
|
|
|
entryNode = self.__doc.createElement('entry')
|
|
|
|
|
entryNode.setAttribute('id', str(self.__currentId))
|
|
|
|
@ -213,7 +213,7 @@ class BasicTellicoDOM:
|
|
|
|
|
translatorNode = self.__doc.createElement('translator')
|
|
|
|
|
translatorNode.appendChild(self.__doc.createTextNode(d['translator']))
|
|
|
|
|
|
|
|
|
|
for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers',
|
|
|
|
|
for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers',
|
|
|
|
|
'comments', 'pages', 'isbn', 'price', 'series', 'seriesNum', 'translator' ):
|
|
|
|
|
entryNode.appendChild(eval(name + 'Node'))
|
|
|
|
|
|
|
|
|
@ -228,17 +228,17 @@ class BasicTellicoDOM:
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
print nEntry.toxml()
|
|
|
|
|
print(nEntry.toxml())
|
|
|
|
|
except:
|
|
|
|
|
print sys.stderr, "Error while outputing XML content from entry to Tellico"
|
|
|
|
|
print(sys.stderr, "Error while outputing XML content from entry to Tellico")
|
|
|
|
|
|
|
|
|
|
def printXMLTree(self):
|
|
|
|
|
"""
|
|
|
|
|
Outputs XML content to stdout
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
print XML_HEADER; print DOCTYPE
|
|
|
|
|
print self.__root.toxml()
|
|
|
|
|
print(XML_HEADER); print(DOCTYPE)
|
|
|
|
|
print(self.__root.toxml())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MinisterioCulturaParser:
|
|
|
|
@ -264,11 +264,11 @@ class MinisterioCulturaParser:
|
|
|
|
|
'cdu' : '<th scope="row">CDU:.*?<td><span>(?P<cdu>.*?)</span></td>',
|
|
|
|
|
'encuadernacion': '<th scope="row">Encuadernación:.*?<td>.*?<span>(?P<encuadernacion>.*?)</span>',
|
|
|
|
|
'series' : '<th scope="row">Colección:.*?<td>.*?<span>(?P<series>.*?)</span>'
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Compile patterns objects
|
|
|
|
|
self.__regExpsPO = {}
|
|
|
|
|
for k, pattern in self.__regExps.iteritems():
|
|
|
|
|
for k, pattern in self.__regExps.items():
|
|
|
|
|
self.__regExpsPO[k] = re.compile(pattern)
|
|
|
|
|
|
|
|
|
|
self.__domTree = BasicTellicoDOM()
|
|
|
|
@ -296,10 +296,10 @@ class MinisterioCulturaParser:
|
|
|
|
|
"""
|
|
|
|
|
Fetch HTML data from url
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
u = urllib2.urlopen(url)
|
|
|
|
|
except Exception, e:
|
|
|
|
|
u = urllib.request.urlopen(url)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
u.close()
|
|
|
|
|
sys.exit("""
|
|
|
|
|
Network error while getting HTML content.
|
|
|
|
@ -312,7 +312,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
|
|
|
|
|
def __fetchBookLinks(self):
|
|
|
|
|
"""
|
|
|
|
|
Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
|
|
|
|
|
Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
|
|
|
|
|
that need to be parsed.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
@ -333,10 +333,10 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
|
|
|
|
|
data['comments'] = []
|
|
|
|
|
# Empty string if series not available
|
|
|
|
|
data['series_num'] = NULLSTRING
|
|
|
|
|
data['series_num'] = NULLSTRING
|
|
|
|
|
data['translator'] = NULLSTRING
|
|
|
|
|
|
|
|
|
|
for name, po in self.__regExpsPO.iteritems():
|
|
|
|
|
for name, po in self.__regExpsPO.items():
|
|
|
|
|
data[name] = NULLSTRING
|
|
|
|
|
matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I)
|
|
|
|
|
|
|
|
|
@ -391,22 +391,22 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
|
|
|
|
|
elif name == 'cdu':
|
|
|
|
|
data['comments'].append('CDU: ' + matches[name].group('cdu').strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif name == 'notas':
|
|
|
|
|
data['comments'].append(matches[name].group('notas').strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif name == 'series':
|
|
|
|
|
d = matches[name].group('series').strip()
|
|
|
|
|
d = re.sub(' ', ' ', d)
|
|
|
|
|
data[name] = d
|
|
|
|
|
# data[name] can contain something like 'Byblos, 162/24'
|
|
|
|
|
|
|
|
|
|
# Maybe better to add the reg exp to get seriesNum in self.__regExps
|
|
|
|
|
# Maybe better to add the reg exp to get seriesNum in self.__regExps
|
|
|
|
|
p = re.compile('[0-9]+$')
|
|
|
|
|
s = re.search(p, data[name])
|
|
|
|
|
|
|
|
|
|
if s:
|
|
|
|
|
# if series ends with a number, it seems that is a
|
|
|
|
|
# if series ends with a number, it seems that is a
|
|
|
|
|
# number of the book inside the series. We save in seriesNum
|
|
|
|
|
data['series_num'] = s.group()
|
|
|
|
|
|
|
|
|
@ -434,7 +434,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
# Sometimes, the search engine outputs some image between a elements
|
|
|
|
|
if d.strip()[:4] != '<img':
|
|
|
|
|
data[name].append(d.strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Move tr authors (translators) to translators list
|
|
|
|
|
translator = self.__getSpecialRol(data[name], TRANSLATOR_STR)
|
|
|
|
|
edlit = self.__getSpecialRol(data[name], EDLIT_STR)
|
|
|
|
@ -470,12 +470,12 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __getBook(self, data, kind = ISBN):
|
|
|
|
|
if not len(data):
|
|
|
|
|
raise EngineError, "No data given. Unable to proceed."
|
|
|
|
|
if not len(data):
|
|
|
|
|
raise EngineError("No data given. Unable to proceed.")
|
|
|
|
|
|
|
|
|
|
if kind == ISBN:
|
|
|
|
|
self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
|
|
|
|
|
(urllib.quote(data), # ISBN
|
|
|
|
|
(urllib.parse.quote(data), # ISBN
|
|
|
|
|
NULLSTRING, # AUTHOR
|
|
|
|
|
NULLSTRING), # TITLE
|
|
|
|
|
self.__suffixURL)
|
|
|
|
@ -483,7 +483,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
elif kind == AUTHOR:
|
|
|
|
|
self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
|
|
|
|
|
(NULLSTRING, # ISBN
|
|
|
|
|
urllib.quote(data), # AUTHOR
|
|
|
|
|
urllib.parse.quote(data), # AUTHOR
|
|
|
|
|
NULLSTRING), # TITLE
|
|
|
|
|
self.__suffixURL)
|
|
|
|
|
)
|
|
|
|
@ -492,7 +492,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
|
|
|
|
|
(NULLSTRING, # ISBN
|
|
|
|
|
NULLSTRING, # AUTHOR
|
|
|
|
|
urllib.quote(data)), # TITLE
|
|
|
|
|
urllib.parse.quote(data)), # TITLE
|
|
|
|
|
self.__suffixURL)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
@ -519,12 +519,12 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
if authors[j] == special:
|
|
|
|
|
special_rol.append(authors[j-1])
|
|
|
|
|
j += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return special_rol
|
|
|
|
|
|
|
|
|
|
def __removeSpecialsFromAuthors(self, authors, specials, string):
|
|
|
|
|
"""
|
|
|
|
|
Receives a list with authors+translators and removes 'tr.' and
|
|
|
|
|
Receives a list with authors+translators and removes 'tr.' and
|
|
|
|
|
authors from there. Example:
|
|
|
|
|
authors: ['Stephen King','Lorenzo Cortina','tr.','Rosalía Vázquez','tr.']
|
|
|
|
|
translators: ['Lorenzo Cortina','Rosalía Vázquez']
|
|
|
|
@ -551,16 +551,16 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
|
|
|
|
|
return prefix + string.join(translators, '; ')
|
|
|
|
|
|
|
|
|
|
def halt():
|
|
|
|
|
print "HALT."
|
|
|
|
|
print("HALT.")
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
def showUsage():
|
|
|
|
|
print """Usage: %s options
|
|
|
|
|
print("""Usage: %s options
|
|
|
|
|
Where options are:
|
|
|
|
|
-t title
|
|
|
|
|
-i (ISBN|UPC)
|
|
|
|
|
-a author
|
|
|
|
|
-m filename (support for multiple ISBN/UPC search)""" % sys.argv[0]
|
|
|
|
|
-m filename (support for multiple ISBN/UPC search)""" % sys.argv[0])
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
@ -573,7 +573,7 @@ def main():
|
|
|
|
|
isbnStringList = NULLSTRING
|
|
|
|
|
|
|
|
|
|
opts = {'-t' : TITLE, '-i' : ISBN, '-a' : AUTHOR, '-m' : isbnStringList}
|
|
|
|
|
if sys.argv[1] not in opts.keys():
|
|
|
|
|
if sys.argv[1] not in list(opts.keys()):
|
|
|
|
|
showUsage()
|
|
|
|
|
|
|
|
|
|
if sys.argv[1] == '-m':
|
|
|
|
@ -584,8 +584,8 @@ def main():
|
|
|
|
|
sys.argv[2] = string.join([d[:-1] for d in data], ';')
|
|
|
|
|
sys.argv[1] = '-i'
|
|
|
|
|
f.close()
|
|
|
|
|
except IOError, e:
|
|
|
|
|
print "Error: %s" % e
|
|
|
|
|
except IOError as e:
|
|
|
|
|
print("Error: %s" % e)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
parser = MinisterioCulturaParser()
|
|
|
|
|