Drop python2 support.

Signed-off-by: Slávek Banko <slavek.banko@axis.cz>
pull/5/head r14.1.0
Slávek Banko 1 year ago
parent b2ccda8be8
commit c800d114bf
No known key found for this signature in database
GPG Key ID: 608F5293A04BE668

@ -33,7 +33,7 @@ Update (checked) = %{title}
""" """
import sys, os, re, md5, random, string import sys, os, re, md5, random, string
import urllib, urllib2, time, base64 import urllib.request, urllib.parse, urllib.error, time, base64
import xml.dom.minidom import xml.dom.minidom
XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
@ -45,7 +45,7 @@ VERSION = "0.2"
def genMD5(): def genMD5():
""" """
Generates and returns a random md5 string. Its main purpose is to allow random Generates and returns a random md5 string. Its main purpose is to allow random
image file name generation. image file name generation.
""" """
obj = md5.new() obj = md5.new()
@ -62,7 +62,7 @@ class BasicTellicoDOM:
self.__root = self.__doc.createElement('tellico') self.__root = self.__doc.createElement('tellico')
self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
self.__root.setAttribute('syntaxVersion', '9') self.__root.setAttribute('syntaxVersion', '9')
self.__collection = self.__doc.createElement('collection') self.__collection = self.__doc.createElement('collection')
self.__collection.setAttribute('title', 'My Comics') self.__collection.setAttribute('title', 'My Comics')
self.__collection.setAttribute('type', '6') self.__collection.setAttribute('type', '6')
@ -78,7 +78,7 @@ class BasicTellicoDOM:
def addEntry(self, movieData): def addEntry(self, movieData):
""" """
Add a comic entry. Add a comic entry.
Returns an entry node instance Returns an entry node instance
""" """
d = movieData d = movieData
@ -86,7 +86,7 @@ class BasicTellicoDOM:
entryNode.setAttribute('id', str(self.__currentId)) entryNode.setAttribute('id', str(self.__currentId))
titleNode = self.__doc.createElement('title') titleNode = self.__doc.createElement('title')
titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8')))
yearNode = self.__doc.createElement('pub_year') yearNode = self.__doc.createElement('pub_year')
yearNode.appendChild(self.__doc.createTextNode(d['pub_year'])) yearNode.appendChild(self.__doc.createTextNode(d['pub_year']))
@ -101,25 +101,25 @@ class BasicTellicoDOM:
writersNode = self.__doc.createElement('writers') writersNode = self.__doc.createElement('writers')
for g in d['writer']: for g in d['writer']:
writerNode = self.__doc.createElement('writer') writerNode = self.__doc.createElement('writer')
writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) writerNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
writersNode.appendChild(writerNode) writersNode.appendChild(writerNode)
genresNode = self.__doc.createElement('genres') genresNode = self.__doc.createElement('genres')
for g in d['genre']: for g in d['genre']:
genreNode = self.__doc.createElement('genre') genreNode = self.__doc.createElement('genre')
genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
genresNode.appendChild(genreNode) genresNode.appendChild(genreNode)
commentsNode = self.__doc.createElement('comments') commentsNode = self.__doc.createElement('comments')
#for g in d['comments']: #for g in d['comments']:
# commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8'))) # commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8')))
commentsData = string.join(d['comments'], '\n\n') commentsData = string.join(d['comments'], '\n\n')
commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8'))) commentsNode.appendChild(self.__doc.createTextNode(str(commentsData, 'latin-1').encode('utf-8')))
artistsNode = self.__doc.createElement('artists') artistsNode = self.__doc.createElement('artists')
for k, v in d['artist'].iteritems(): for k, v in d['artist'].items():
artistNode = self.__doc.createElement('artist') artistNode = self.__doc.createElement('artist')
artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8'))) artistNode.appendChild(self.__doc.createTextNode(str(v, 'latin-1').encode('utf-8')))
artistsNode.appendChild(artistNode) artistsNode.appendChild(artistNode)
pagesNode = self.__doc.createElement('pages') pagesNode = self.__doc.createElement('pages')
@ -132,13 +132,13 @@ class BasicTellicoDOM:
imageNode = self.__doc.createElement('image') imageNode = self.__doc.createElement('image')
imageNode.setAttribute('format', 'JPEG') imageNode.setAttribute('format', 'JPEG')
imageNode.setAttribute('id', d['image'][0]) imageNode.setAttribute('id', d['image'][0])
imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8')))
coverNode = self.__doc.createElement('cover') coverNode = self.__doc.createElement('cover')
coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) coverNode.appendChild(self.__doc.createTextNode(d['image'][0]))
for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode', for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode',
'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode', 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode',
'countryNode' ): 'countryNode' ):
entryNode.appendChild(eval(name)) entryNode.appendChild(eval(name))
@ -147,7 +147,7 @@ class BasicTellicoDOM:
self.__images.appendChild(imageNode) self.__images.appendChild(imageNode)
self.__collection.appendChild(entryNode) self.__collection.appendChild(entryNode)
self.__currentId += 1 self.__currentId += 1
return entryNode return entryNode
@ -156,17 +156,17 @@ class BasicTellicoDOM:
Prints entry's XML content to stdout Prints entry's XML content to stdout
""" """
try: try:
print nEntry.toxml() print(nEntry.toxml())
except: except:
print sys.stderr, "Error while outputing XML content from entry to Tellico" print(sys.stderr, "Error while outputing XML content from entry to Tellico")
def printXMLTree(self): def printXMLTree(self):
""" """
Outputs XML content to stdout Outputs XML content to stdout
""" """
self.__collection.appendChild(self.__images) self.__collection.appendChild(self.__images)
print XML_HEADER; print DOCTYPE print(XML_HEADER); print(DOCTYPE)
print self.__root.toxml() print(self.__root.toxml())
class DarkHorseParser: class DarkHorseParser:
@ -189,11 +189,11 @@ class DarkHorseParser:
'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>', 'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>',
'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>', 'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>',
'format' : '<b>Format: *</b> *(?P<format>.*?)<br>', 'format' : '<b>Format: *</b> *(?P<format>.*?)<br>',
} }
# Compile patterns objects # Compile patterns objects
self.__regExpsPO = {} self.__regExpsPO = {}
for k, pattern in self.__regExps.iteritems(): for k, pattern in self.__regExps.items():
self.__regExpsPO[k] = re.compile(pattern) self.__regExpsPO[k] = re.compile(pattern)
self.__domTree = BasicTellicoDOM() self.__domTree = BasicTellicoDOM()
@ -211,18 +211,18 @@ class DarkHorseParser:
""" """
Fetch HTML data from url Fetch HTML data from url
""" """
u = urllib2.urlopen(url) u = urllib.request.urlopen(url)
self.__data = u.read() self.__data = u.read()
u.close() u.close()
def __fetchMovieLinks(self): def __fetchMovieLinks(self):
""" """
Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
that need to be parsed. that need to be parsed.
""" """
matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data) matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data)
if not matchList: return None if not matchList: return None
return matchList return matchList
def __fetchCover(self, path, delete = True): def __fetchCover(self, path, delete = True):
@ -231,7 +231,7 @@ class DarkHorseParser:
The image is deleted if delete is True The image is deleted if delete is True
""" """
md5 = genMD5() md5 = genMD5()
imObj = urllib2.urlopen(path.strip()) imObj = urllib.request.urlopen(path.strip())
img = imObj.read() img = imObj.read()
imObj.close() imObj.close()
imgPath = "/tmp/%s.jpeg" % md5 imgPath = "/tmp/%s.jpeg" % md5
@ -240,7 +240,7 @@ class DarkHorseParser:
f.write(img) f.write(img)
f.close() f.close()
except: except:
print sys.stderr, "Error: could not write image into /tmp" print(sys.stderr, "Error: could not write image into /tmp")
b64data = (md5 + '.jpeg', base64.encodestring(img)) b64data = (md5 + '.jpeg', base64.encodestring(img))
@ -249,7 +249,7 @@ class DarkHorseParser:
try: try:
os.remove(imgPath) os.remove(imgPath)
except: except:
print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5 print(sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5)
return b64data return b64data
@ -286,7 +286,7 @@ class DarkHorseParser:
data['image'] = b64img data['image'] = b64img
data['pub_year'] = NULLSTRING data['pub_year'] = NULLSTRING
for name, po in self.__regExpsPO.iteritems(): for name, po in self.__regExpsPO.items():
data[name] = NULLSTRING data[name] = NULLSTRING
if name == 'desc': if name == 'desc':
matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I)
@ -363,7 +363,7 @@ class DarkHorseParser:
if not len(title): return if not len(title): return
self.__title = title self.__title = title
self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title))) self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.parse.quote(self.__title)))
# Get all links # Get all links
links = self.__fetchMovieLinks() links = self.__fetchMovieLinks()
@ -381,11 +381,11 @@ class DarkHorseParser:
return None return None
def halt(): def halt():
print "HALT." print("HALT.")
sys.exit(0) sys.exit(0)
def showUsage(): def showUsage():
print "Usage: %s comic" % sys.argv[0] print("Usage: %s comic" % sys.argv[0])
sys.exit(1) sys.exit(1)
def main(): def main():

@ -15,20 +15,20 @@
# *************************************************************************** # ***************************************************************************
# Version 0.4: 2007-08-27 # Version 0.4: 2007-08-27
# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres # * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres
# could not be retrieved. Fixed bad http request error due to some changes in HTML code. # could not be retrieved. Fixed bad http request error due to some changes in HTML code.
# #
# Version 0.3: # Version 0.3:
# * Fixed parsing: some fields in allocine's HTML pages have changed. Movie's image could not be fetched anymore. Fixed. # * Fixed parsing: some fields in allocine's HTML pages have changed. Movie's image could not be fetched anymore. Fixed.
# #
# Version 0.2: # Version 0.2:
# * Fixed parsing: allocine's HTML pages have changed. Movie's image could not be fetched anymore. # * Fixed parsing: allocine's HTML pages have changed. Movie's image could not be fetched anymore.
# #
# Version 0.1: # Version 0.1:
# * Initial release. # * Initial release.
import sys, os, re, md5, random import sys, os, re, md5, random
import urllib, urllib2, time, base64 import urllib.request, urllib.parse, urllib.error, time, base64
import xml.dom.minidom import xml.dom.minidom
XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
@ -48,16 +48,16 @@ class BasicTellicoDOM:
self.__root = self.__doc.createElement('tellico') self.__root = self.__doc.createElement('tellico')
self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
self.__root.setAttribute('syntaxVersion', '9') self.__root.setAttribute('syntaxVersion', '9')
self.__collection = self.__doc.createElement('collection') self.__collection = self.__doc.createElement('collection')
self.__collection.setAttribute('title', 'My Movies') self.__collection.setAttribute('title', 'My Movies')
self.__collection.setAttribute('type', '3') self.__collection.setAttribute('type', '3')
self.__fields = self.__doc.createElement('fields') self.__fields = self.__doc.createElement('fields')
# Add all default (standard) fields # Add all default (standard) fields
self.__dfltField = self.__doc.createElement('field') self.__dfltField = self.__doc.createElement('field')
self.__dfltField.setAttribute('name', '_default') self.__dfltField.setAttribute('name', '_default')
# Add a custom 'Collection' field # Add a custom 'Collection' field
self.__customField = self.__doc.createElement('field') self.__customField = self.__doc.createElement('field')
self.__customField.setAttribute('name', 'titre-original') self.__customField.setAttribute('name', 'titre-original')
@ -67,7 +67,7 @@ class BasicTellicoDOM:
self.__customField.setAttribute('format', '1') self.__customField.setAttribute('format', '1')
self.__customField.setAttribute('type', '1') self.__customField.setAttribute('type', '1')
self.__customField.setAttribute('i18n', 'yes') self.__customField.setAttribute('i18n', 'yes')
self.__fields.appendChild(self.__dfltField) self.__fields.appendChild(self.__dfltField)
self.__fields.appendChild(self.__customField) self.__fields.appendChild(self.__customField)
self.__collection.appendChild(self.__fields) self.__collection.appendChild(self.__fields)
@ -90,23 +90,23 @@ class BasicTellicoDOM:
entryNode.setAttribute('id', str(self.__currentId)) entryNode.setAttribute('id', str(self.__currentId))
titleNode = self.__doc.createElement('title') titleNode = self.__doc.createElement('title')
titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8')))
otitleNode = self.__doc.createElement('titre-original') otitleNode = self.__doc.createElement('titre-original')
otitleNode.appendChild(self.__doc.createTextNode(unicode(d['otitle'], 'latin-1').encode('utf-8'))) otitleNode.appendChild(self.__doc.createTextNode(str(d['otitle'], 'latin-1').encode('utf-8')))
yearNode = self.__doc.createElement('year') yearNode = self.__doc.createElement('year')
yearNode.appendChild(self.__doc.createTextNode(unicode(d['year'], 'latin-1').encode('utf-8'))) yearNode.appendChild(self.__doc.createTextNode(str(d['year'], 'latin-1').encode('utf-8')))
genresNode = self.__doc.createElement('genres') genresNode = self.__doc.createElement('genres')
for g in d['genres']: for g in d['genres']:
genreNode = self.__doc.createElement('genre') genreNode = self.__doc.createElement('genre')
genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
genresNode.appendChild(genreNode) genresNode.appendChild(genreNode)
natsNode = self.__doc.createElement('nationalitys') natsNode = self.__doc.createElement('nationalitys')
natNode = self.__doc.createElement('nat') natNode = self.__doc.createElement('nat')
natNode.appendChild(self.__doc.createTextNode(unicode(d['nat'], 'latin-1').encode('utf-8'))) natNode.appendChild(self.__doc.createTextNode(str(d['nat'], 'latin-1').encode('utf-8')))
natsNode.appendChild(natNode) natsNode.appendChild(natNode)
castsNode = self.__doc.createElement('casts') castsNode = self.__doc.createElement('casts')
@ -114,7 +114,7 @@ class BasicTellicoDOM:
castNode = self.__doc.createElement('cast') castNode = self.__doc.createElement('cast')
col1Node = self.__doc.createElement('column') col1Node = self.__doc.createElement('column')
col2Node = self.__doc.createElement('column') col2Node = self.__doc.createElement('column')
col1Node.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) col1Node.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
castNode.appendChild(col1Node) castNode.appendChild(col1Node)
castNode.appendChild(col2Node) castNode.appendChild(col2Node)
castsNode.appendChild(castNode) castsNode.appendChild(castNode)
@ -122,17 +122,17 @@ class BasicTellicoDOM:
dirsNode = self.__doc.createElement('directors') dirsNode = self.__doc.createElement('directors')
for g in d['dirs']: for g in d['dirs']:
dirNode = self.__doc.createElement('director') dirNode = self.__doc.createElement('director')
dirNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) dirNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
dirsNode.appendChild(dirNode) dirsNode.appendChild(dirNode)
timeNode = self.__doc.createElement('running-time') timeNode = self.__doc.createElement('running-time')
timeNode.appendChild(self.__doc.createTextNode(unicode(d['time'], 'latin-1').encode('utf-8'))) timeNode.appendChild(self.__doc.createTextNode(str(d['time'], 'latin-1').encode('utf-8')))
allocineNode = self.__doc.createElement(unicode('allociné-link', 'latin-1').encode('utf-8')) allocineNode = self.__doc.createElement(str('allociné-link', 'latin-1').encode('utf-8'))
allocineNode.appendChild(self.__doc.createTextNode(unicode(d['allocine'], 'latin-1').encode('utf-8'))) allocineNode.appendChild(self.__doc.createTextNode(str(d['allocine'], 'latin-1').encode('utf-8')))
plotNode = self.__doc.createElement('plot') plotNode = self.__doc.createElement('plot')
plotNode.appendChild(self.__doc.createTextNode(unicode(d['plot'], 'latin-1').encode('utf-8'))) plotNode.appendChild(self.__doc.createTextNode(str(d['plot'], 'latin-1').encode('utf-8')))
if d['image']: if d['image']:
imageNode = self.__doc.createElement('image') imageNode = self.__doc.createElement('image')
@ -140,12 +140,12 @@ class BasicTellicoDOM:
imageNode.setAttribute('id', d['image'][0]) imageNode.setAttribute('id', d['image'][0])
imageNode.setAttribute('width', '120') imageNode.setAttribute('width', '120')
imageNode.setAttribute('height', '160') imageNode.setAttribute('height', '160')
imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8')))
coverNode = self.__doc.createElement('cover') coverNode = self.__doc.createElement('cover')
coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) coverNode.appendChild(self.__doc.createTextNode(d['image'][0]))
for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode', for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode',
'castsNode', 'dirsNode', 'timeNode', 'allocineNode', 'plotNode' ): 'castsNode', 'dirsNode', 'timeNode', 'allocineNode', 'plotNode' ):
entryNode.appendChild(eval(name)) entryNode.appendChild(eval(name))
@ -154,7 +154,7 @@ class BasicTellicoDOM:
self.__images.appendChild(imageNode) self.__images.appendChild(imageNode)
self.__collection.appendChild(entryNode) self.__collection.appendChild(entryNode)
self.__currentId += 1 self.__currentId += 1
def printXML(self): def printXML(self):
@ -162,8 +162,8 @@ class BasicTellicoDOM:
Outputs XML content to stdout Outputs XML content to stdout
""" """
self.__collection.appendChild(self.__images) self.__collection.appendChild(self.__images)
print XML_HEADER; print DOCTYPE print(XML_HEADER); print(DOCTYPE)
print self.__root.toxml() print(self.__root.toxml())
class AlloCineParser: class AlloCineParser:
@ -185,7 +185,7 @@ class AlloCineParser:
'otitle' : 'Titre original *: *<i>(?P<otitle>.+?)</i>', 'otitle' : 'Titre original *: *<i>(?P<otitle>.+?)</i>',
'plot' : """(?s)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4> *(?P<plot>.+?) *</h4>""", 'plot' : """(?s)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4> *(?P<plot>.+?) *</h4>""",
'image' : """<td valign="top" width="120".*?<img src="(?P<image>.+?)" border"""} 'image' : """<td valign="top" width="120".*?<img src="(?P<image>.+?)" border"""}
self.__domTree = BasicTellicoDOM() self.__domTree = BasicTellicoDOM()
@ -203,7 +203,7 @@ class AlloCineParser:
Fetch HTML data from url Fetch HTML data from url
""" """
u = urllib2.urlopen(url) u = urllib.request.urlopen(url)
self.__data = u.read() self.__data = u.read()
u.close() u.close()
@ -224,7 +224,7 @@ class AlloCineParser:
matches = data = {} matches = data = {}
for name, regexp in self.__regExps.iteritems(): for name, regexp in self.__regExps.items():
if name == 'image': if name == 'image':
matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I)
else: else:
@ -272,7 +272,7 @@ class AlloCineParser:
elif name == 'image': elif name == 'image':
# Save image to a temporary folder # Save image to a temporary folder
md5 = genMD5() md5 = genMD5()
imObj = urllib2.urlopen(matches[name][0].strip()) imObj = urllib.request.urlopen(matches[name][0].strip())
img = imObj.read() img = imObj.read()
imObj.close() imObj.close()
imgPath = "/tmp/%s.jpeg" % md5 imgPath = "/tmp/%s.jpeg" % md5
@ -303,7 +303,7 @@ class AlloCineParser:
if not len(title): return if not len(title): return
self.__title = title self.__title = title
self.__getHTMLContent(self.__searchURL % urllib.quote(self.__title)) self.__getHTMLContent(self.__searchURL % urllib.parse.quote(self.__title))
# Get all links # Get all links
links = self.__fetchMovieLinks() links = self.__fetchMovieLinks()
@ -321,7 +321,7 @@ class AlloCineParser:
def showUsage(): def showUsage():
print "Usage: %s movietitle" % sys.argv[0] print("Usage: %s movietitle" % sys.argv[0])
sys.exit(1) sys.exit(1)
def main(): def main():

@ -37,10 +37,10 @@ ISBN (checked) = -i %1
UPC (checked) = -i %1 UPC (checked) = -i %1
Update (checked) = %{title} Update (checked) = %{title}
** Please note that this script is also part of the Tellico's distribution. ** Please note that this script is also part of the Tellico's distribution.
** You will always find the latest version in the SVN trunk of Tellico ** You will always find the latest version in the SVN trunk of Tellico
SVN Version: SVN Version:
* Removes translators for Authors List * Removes translators for Authors List
* Adds translators to translator field * Adds translators to translator field
* Change from "Collection" to "Series" * Change from "Collection" to "Series"
@ -85,7 +85,7 @@ Version 0.1:
""" """
import sys, os, re, md5, random, string import sys, os, re, md5, random, string
import urllib, urllib2, time, base64 import urllib.request, urllib.parse, urllib.error, time, base64
import xml.dom.minidom, types import xml.dom.minidom, types
import socket import socket
@ -95,7 +95,7 @@ NULLSTRING = ''
VERSION = "0.3.2" VERSION = "0.3.2"
ISBN, AUTHOR, TITLE = range(3) ISBN, AUTHOR, TITLE = list(range(3))
TRANSLATOR_STR = "tr." TRANSLATOR_STR = "tr."
EDLIT_STR = "ed. lit." EDLIT_STR = "ed. lit."
@ -111,16 +111,16 @@ class BasicTellicoDOM:
self.__root = self.__doc.createElement('tellico') self.__root = self.__doc.createElement('tellico')
self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
self.__root.setAttribute('syntaxVersion', '9') self.__root.setAttribute('syntaxVersion', '9')
self.__collection = self.__doc.createElement('collection') self.__collection = self.__doc.createElement('collection')
self.__collection.setAttribute('title', 'My Books') self.__collection.setAttribute('title', 'My Books')
self.__collection.setAttribute('type', '2') self.__collection.setAttribute('type', '2')
self.__fields = self.__doc.createElement('fields') self.__fields = self.__doc.createElement('fields')
# Add all default (standard) fields # Add all default (standard) fields
self.__dfltField = self.__doc.createElement('field') self.__dfltField = self.__doc.createElement('field')
self.__dfltField.setAttribute('name', '_default') self.__dfltField.setAttribute('name', '_default')
# Add a custom 'Collection' field (Left by reference for # Add a custom 'Collection' field (Left by reference for
# the future) # the future)
#self.__customCollectionField = self.__doc.createElement('field') #self.__customCollectionField = self.__doc.createElement('field')
@ -146,18 +146,18 @@ class BasicTellicoDOM:
def addEntry(self, movieData): def addEntry(self, movieData):
""" """
Add a comic entry. Add a comic entry.
Returns an entry node instance Returns an entry node instance
""" """
d = movieData d = movieData
# Convert all strings to UTF-8 # Convert all strings to UTF-8
for i in d.keys(): for i in list(d.keys()):
if type(d[i]) == types.ListType: if type(d[i]) == list:
d[i] = [unicode(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))] d[i] = [str(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))]
elif type(d[i]) == types.StringType: elif type(d[i]) == bytes:
d[i] = unicode(d[i], 'latin-1').encode('utf-8') d[i] = str(d[i], 'latin-1').encode('utf-8')
entryNode = self.__doc.createElement('entry') entryNode = self.__doc.createElement('entry')
entryNode.setAttribute('id', str(self.__currentId)) entryNode.setAttribute('id', str(self.__currentId))
@ -213,7 +213,7 @@ class BasicTellicoDOM:
translatorNode = self.__doc.createElement('translator') translatorNode = self.__doc.createElement('translator')
translatorNode.appendChild(self.__doc.createTextNode(d['translator'])) translatorNode.appendChild(self.__doc.createTextNode(d['translator']))
for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers', for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers',
'comments', 'pages', 'isbn', 'price', 'series', 'seriesNum', 'translator' ): 'comments', 'pages', 'isbn', 'price', 'series', 'seriesNum', 'translator' ):
entryNode.appendChild(eval(name + 'Node')) entryNode.appendChild(eval(name + 'Node'))
@ -228,17 +228,17 @@ class BasicTellicoDOM:
""" """
try: try:
print nEntry.toxml() print(nEntry.toxml())
except: except:
print sys.stderr, "Error while outputing XML content from entry to Tellico" print(sys.stderr, "Error while outputing XML content from entry to Tellico")
def printXMLTree(self): def printXMLTree(self):
""" """
Outputs XML content to stdout Outputs XML content to stdout
""" """
print XML_HEADER; print DOCTYPE print(XML_HEADER); print(DOCTYPE)
print self.__root.toxml() print(self.__root.toxml())
class MinisterioCulturaParser: class MinisterioCulturaParser:
@ -264,11 +264,11 @@ class MinisterioCulturaParser:
'cdu' : '<th scope="row">CDU:.*?<td><span>(?P<cdu>.*?)</span></td>', 'cdu' : '<th scope="row">CDU:.*?<td><span>(?P<cdu>.*?)</span></td>',
'encuadernacion': '<th scope="row">Encuadernaci&oacute;n:.*?<td>.*?<span>(?P<encuadernacion>.*?)</span>', 'encuadernacion': '<th scope="row">Encuadernaci&oacute;n:.*?<td>.*?<span>(?P<encuadernacion>.*?)</span>',
'series' : '<th scope="row">Colecci&oacute;n:.*?<td>.*?<span>(?P<series>.*?)</span>' 'series' : '<th scope="row">Colecci&oacute;n:.*?<td>.*?<span>(?P<series>.*?)</span>'
} }
# Compile patterns objects # Compile patterns objects
self.__regExpsPO = {} self.__regExpsPO = {}
for k, pattern in self.__regExps.iteritems(): for k, pattern in self.__regExps.items():
self.__regExpsPO[k] = re.compile(pattern) self.__regExpsPO[k] = re.compile(pattern)
self.__domTree = BasicTellicoDOM() self.__domTree = BasicTellicoDOM()
@ -296,10 +296,10 @@ class MinisterioCulturaParser:
""" """
Fetch HTML data from url Fetch HTML data from url
""" """
try: try:
u = urllib2.urlopen(url) u = urllib.request.urlopen(url)
except Exception, e: except Exception as e:
u.close() u.close()
sys.exit(""" sys.exit("""
Network error while getting HTML content. Network error while getting HTML content.
@ -312,7 +312,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
def __fetchBookLinks(self): def __fetchBookLinks(self):
""" """
Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
that need to be parsed. that need to be parsed.
""" """
@ -333,10 +333,10 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
data['comments'] = [] data['comments'] = []
# Empty string if series not available # Empty string if series not available
data['series_num'] = NULLSTRING data['series_num'] = NULLSTRING
data['translator'] = NULLSTRING data['translator'] = NULLSTRING
for name, po in self.__regExpsPO.iteritems(): for name, po in self.__regExpsPO.items():
data[name] = NULLSTRING data[name] = NULLSTRING
matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I) matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I)
@ -391,22 +391,22 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
elif name == 'cdu': elif name == 'cdu':
data['comments'].append('CDU: ' + matches[name].group('cdu').strip()) data['comments'].append('CDU: ' + matches[name].group('cdu').strip())
elif name == 'notas': elif name == 'notas':
data['comments'].append(matches[name].group('notas').strip()) data['comments'].append(matches[name].group('notas').strip())
elif name == 'series': elif name == 'series':
d = matches[name].group('series').strip() d = matches[name].group('series').strip()
d = re.sub('&nbsp;', ' ', d) d = re.sub('&nbsp;', ' ', d)
data[name] = d data[name] = d
# data[name] can contain something like 'Byblos, 162/24' # data[name] can contain something like 'Byblos, 162/24'
# Maybe better to add the reg exp to get seriesNum in self.__regExps # Maybe better to add the reg exp to get seriesNum in self.__regExps
p = re.compile('[0-9]+$') p = re.compile('[0-9]+$')
s = re.search(p, data[name]) s = re.search(p, data[name])
if s: if s:
# if series ends with a number, it seems that is a # if series ends with a number, it seems that is a
# number of the book inside the series. We save in seriesNum # number of the book inside the series. We save in seriesNum
data['series_num'] = s.group() data['series_num'] = s.group()
@ -434,7 +434,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
# Sometimes, the search engine outputs some image between a elements # Sometimes, the search engine outputs some image between a elements
if d.strip()[:4] != '<img': if d.strip()[:4] != '<img':
data[name].append(d.strip()) data[name].append(d.strip())
# Move tr authors (translators) to translators list # Move tr authors (translators) to translators list
translator = self.__getSpecialRol(data[name], TRANSLATOR_STR) translator = self.__getSpecialRol(data[name], TRANSLATOR_STR)
edlit = self.__getSpecialRol(data[name], EDLIT_STR) edlit = self.__getSpecialRol(data[name], EDLIT_STR)
@ -470,12 +470,12 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
def __getBook(self, data, kind = ISBN): def __getBook(self, data, kind = ISBN):
if not len(data): if not len(data):
raise EngineError, "No data given. Unable to proceed." raise EngineError("No data given. Unable to proceed.")
if kind == ISBN: if kind == ISBN:
self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
(urllib.quote(data), # ISBN (urllib.parse.quote(data), # ISBN
NULLSTRING, # AUTHOR NULLSTRING, # AUTHOR
NULLSTRING), # TITLE NULLSTRING), # TITLE
self.__suffixURL) self.__suffixURL)
@ -483,7 +483,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
elif kind == AUTHOR: elif kind == AUTHOR:
self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
(NULLSTRING, # ISBN (NULLSTRING, # ISBN
urllib.quote(data), # AUTHOR urllib.parse.quote(data), # AUTHOR
NULLSTRING), # TITLE NULLSTRING), # TITLE
self.__suffixURL) self.__suffixURL)
) )
@ -492,7 +492,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
(NULLSTRING, # ISBN (NULLSTRING, # ISBN
NULLSTRING, # AUTHOR NULLSTRING, # AUTHOR
urllib.quote(data)), # TITLE urllib.parse.quote(data)), # TITLE
self.__suffixURL) self.__suffixURL)
) )
@ -519,12 +519,12 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
if authors[j] == special: if authors[j] == special:
special_rol.append(authors[j-1]) special_rol.append(authors[j-1])
j += 1 j += 1
return special_rol return special_rol
def __removeSpecialsFromAuthors(self, authors, specials, string): def __removeSpecialsFromAuthors(self, authors, specials, string):
""" """
Receives a list with authors+translators and removes 'tr.' and Receives a list with authors+translators and removes 'tr.' and
authors from there. Example: authors from there. Example:
authors: ['Stephen King','Lorenzo Cortina','tr.','Rosalía Vázquez','tr.'] authors: ['Stephen King','Lorenzo Cortina','tr.','Rosalía Vázquez','tr.']
translators: ['Lorenzo Cortina','Rosalía Vázquez'] translators: ['Lorenzo Cortina','Rosalía Vázquez']
@ -551,16 +551,16 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
return prefix + string.join(translators, '; ') return prefix + string.join(translators, '; ')
def halt(): def halt():
print "HALT." print("HALT.")
sys.exit(0) sys.exit(0)
def showUsage(): def showUsage():
print """Usage: %s options print("""Usage: %s options
Where options are: Where options are:
-t title -t title
-i (ISBN|UPC) -i (ISBN|UPC)
-a author -a author
-m filename (support for multiple ISBN/UPC search)""" % sys.argv[0] -m filename (support for multiple ISBN/UPC search)""" % sys.argv[0])
sys.exit(1) sys.exit(1)
def main(): def main():
@ -573,7 +573,7 @@ def main():
isbnStringList = NULLSTRING isbnStringList = NULLSTRING
opts = {'-t' : TITLE, '-i' : ISBN, '-a' : AUTHOR, '-m' : isbnStringList} opts = {'-t' : TITLE, '-i' : ISBN, '-a' : AUTHOR, '-m' : isbnStringList}
if sys.argv[1] not in opts.keys(): if sys.argv[1] not in list(opts.keys()):
showUsage() showUsage()
if sys.argv[1] == '-m': if sys.argv[1] == '-m':
@ -584,8 +584,8 @@ def main():
sys.argv[2] = string.join([d[:-1] for d in data], ';') sys.argv[2] = string.join([d[:-1] for d in data], ';')
sys.argv[1] = '-i' sys.argv[1] = '-i'
f.close() f.close()
except IOError, e: except IOError as e:
print "Error: %s" % e print("Error: %s" % e)
sys.exit(1) sys.exit(1)
parser = MinisterioCulturaParser() parser = MinisterioCulturaParser()

@ -21,7 +21,7 @@ import xml.dom.minidom
try: try:
import sqlite3 import sqlite3
except: except:
print sys.stderr, "The Python sqlite3 module is required to import Griffith databases." print(sys.stderr, "The Python sqlite3 module is required to import Griffith databases.")
exit(1) exit(1)
DB_PATH = os.environ['HOME'] + '/.griffith/griffith.db' DB_PATH = os.environ['HOME'] + '/.griffith/griffith.db'
@ -36,7 +36,7 @@ class BasicTellicoDOM:
self.__root = self.__doc.createElement('tellico') self.__root = self.__doc.createElement('tellico')
self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
self.__root.setAttribute('syntaxVersion', '9') self.__root.setAttribute('syntaxVersion', '9')
self.__collection = self.__doc.createElement('collection') self.__collection = self.__doc.createElement('collection')
self.__collection.setAttribute('title', 'Griffith Import') self.__collection.setAttribute('title', 'Griffith Import')
self.__collection.setAttribute('type', '3') self.__collection.setAttribute('type', '3')
@ -45,7 +45,7 @@ class BasicTellicoDOM:
# Add all default (standard) fields # Add all default (standard) fields
self.__dfltField = self.__doc.createElement('field') self.__dfltField = self.__doc.createElement('field')
self.__dfltField.setAttribute('name', '_default') self.__dfltField.setAttribute('name', '_default')
# change the rating to have a maximum of 10 # change the rating to have a maximum of 10
self.__ratingField = self.__doc.createElement('field') self.__ratingField = self.__doc.createElement('field')
self.__ratingField.setAttribute('name', 'rating') self.__ratingField.setAttribute('name', 'rating')
@ -63,7 +63,7 @@ class BasicTellicoDOM:
propNode.setAttribute('name', 'minimum') propNode.setAttribute('name', 'minimum')
propNode.appendChild(self.__doc.createTextNode('1')) propNode.appendChild(self.__doc.createTextNode('1'))
self.__ratingField.appendChild(propNode); self.__ratingField.appendChild(propNode);
# Add a custom 'Original Title' field # Add a custom 'Original Title' field
self.__titleField = self.__doc.createElement('field') self.__titleField = self.__doc.createElement('field')
self.__titleField.setAttribute('name', 'orig-title') self.__titleField.setAttribute('name', 'orig-title')
@ -73,7 +73,7 @@ class BasicTellicoDOM:
self.__titleField.setAttribute('format', '1') self.__titleField.setAttribute('format', '1')
self.__titleField.setAttribute('type', '1') self.__titleField.setAttribute('type', '1')
self.__titleField.setAttribute('i18n', 'yes') self.__titleField.setAttribute('i18n', 'yes')
self.__keywordField = self.__doc.createElement('field') self.__keywordField = self.__doc.createElement('field')
self.__keywordField.setAttribute('name', 'keyword') self.__keywordField.setAttribute('name', 'keyword')
self.__keywordField.setAttribute('title', 'Keywords') self.__keywordField.setAttribute('title', 'Keywords')
@ -98,13 +98,13 @@ class BasicTellicoDOM:
self.__fields.appendChild(self.__keywordField) self.__fields.appendChild(self.__keywordField)
self.__fields.appendChild(self.__urlField) self.__fields.appendChild(self.__urlField)
self.__collection.appendChild(self.__fields) self.__collection.appendChild(self.__fields)
self.__images = self.__doc.createElement('images') self.__images = self.__doc.createElement('images')
self.__root.appendChild(self.__collection) self.__root.appendChild(self.__collection)
self.__doc.appendChild(self.__root) self.__doc.appendChild(self.__root)
self.__fieldsMap = dict(country='nationality', self.__fieldsMap = dict(country='nationality',
classification='certification', classification='certification',
runtime='running-time', runtime='running-time',
o_title='orig-title', o_title='orig-title',
notes='comments', notes='comments',
@ -121,7 +121,7 @@ class BasicTellicoDOM:
# make sure unique # make sure unique
set = {} set = {}
media = [set.setdefault(e,e) for e in orig_media if e not in set] media = [set.setdefault(e,e) for e in orig_media if e not in set]
mediaField = self.__doc.createElement('field') mediaField = self.__doc.createElement('field')
mediaField.setAttribute('name', 'medium') mediaField.setAttribute('name', 'medium')
mediaField.setAttribute('title', 'Medium') mediaField.setAttribute('title', 'Medium')
@ -140,17 +140,17 @@ class BasicTellicoDOM:
entryNode = self.__doc.createElement('entry') entryNode = self.__doc.createElement('entry')
entryNode.setAttribute('id', movieData['id']) entryNode.setAttribute('id', movieData['id'])
for key, values in movieData.iteritems(): for key, values in movieData.items():
if key == 'id': if key == 'id':
continue continue
if self.__fieldsMap.has_key(key): if key in self.__fieldsMap:
field = self.__fieldsMap[key] field = self.__fieldsMap[key]
else: else:
field = key field = key
parentNode = self.__doc.createElement(field + 's') parentNode = self.__doc.createElement(field + 's')
for value in values: for value in values:
if len(value) == 0: continue if len(value) == 0: continue
node = self.__doc.createElement(field) node = self.__doc.createElement(field)
@ -163,18 +163,18 @@ class BasicTellicoDOM:
imageNode.appendChild(self.__doc.createTextNode(value[1])) imageNode.appendChild(self.__doc.createTextNode(value[1]))
self.__images.appendChild(imageNode) self.__images.appendChild(imageNode)
value = value[0] # value was (id, md5) value = value[0] # value was (id, md5)
if field == 'cast': if field == 'cast':
for v in value: for v in value:
columnNode = self.__doc.createElement('column') columnNode = self.__doc.createElement('column')
columnNode.appendChild(self.__doc.createTextNode(v.strip())) columnNode.appendChild(self.__doc.createTextNode(v.strip()))
node.appendChild(columnNode) node.appendChild(columnNode)
else: else:
node.appendChild(self.__doc.createTextNode(value.strip())) node.appendChild(self.__doc.createTextNode(value.strip()))
if node.hasChildNodes(): parentNode.appendChild(node) if node.hasChildNodes(): parentNode.appendChild(node)
if parentNode.hasChildNodes(): entryNode.appendChild(parentNode) if parentNode.hasChildNodes(): entryNode.appendChild(parentNode)
self.__collection.appendChild(entryNode) self.__collection.appendChild(entryNode)
@ -184,8 +184,8 @@ class BasicTellicoDOM:
Outputs XML content to stdout Outputs XML content to stdout
""" """
self.__collection.appendChild(self.__images) self.__collection.appendChild(self.__images)
print XML_HEADER; print DOCTYPE print(XML_HEADER); print(DOCTYPE)
print self.__root.toxml() print(self.__root.toxml())
class GriffithParser: class GriffithParser:
@ -210,7 +210,7 @@ class GriffithParser:
media = list([row[0].encode('utf-8') for row in c.fetchall()]) media = list([row[0].encode('utf-8') for row in c.fetchall()])
self.__domTree.addMedia(media) self.__domTree.addMedia(media)
def __fetchMovieIds(self): def __fetchMovieIds(self):
""" """
Retrieve all movie ids Retrieve all movie ids
@ -230,7 +230,7 @@ class GriffithParser:
'country','genre','classification','plot', 'country','genre','classification','plot',
'runtime','o_title','studio','notes','image', 'runtime','o_title','studio','notes','image',
'[cast]','loaned','color','site') '[cast]','loaned','color','site')
c = self.__conn.cursor() c = self.__conn.cursor()
c.execute("SELECT %s FROM movies WHERE movie_id=%s" % (','.join(columns),id)) c.execute("SELECT %s FROM movies WHERE movie_id=%s" % (','.join(columns),id))
row = c.fetchone() row = c.fetchone()
@ -240,14 +240,14 @@ class GriffithParser:
for i in range(len(columns)): for i in range(len(columns)):
if row[i] == None : continue if row[i] == None : continue
try: try:
value = row[i].encode('utf-8') value = row[i].encode('utf-8')
except: except:
value = str(row[i]) value = str(row[i])
col = columns[i].replace('[','').replace(']','') col = columns[i].replace('[','').replace(']','')
if col == 'genre' or col == 'studio': if col == 'genre' or col == 'studio':
values = value.split('/') values = value.split('/')
elif col == 'plot' or col == 'notes': elif col == 'plot' or col == 'notes':
@ -280,19 +280,19 @@ class GriffithParser:
media = list([row[0].encode('utf-8') for row in c.fetchall()]) media = list([row[0].encode('utf-8') for row in c.fetchall()])
if len(media) > 0: data['medium'] = media if len(media) > 0: data['medium'] = media
# get all tags # get all tags
c.execute("SELECT name FROM tags WHERE tag_id IN (SELECT tag_id FROM movie_tag WHERE movie_id=%s)" % id) c.execute("SELECT name FROM tags WHERE tag_id IN (SELECT tag_id FROM movie_tag WHERE movie_id=%s)" % id)
tags = list([row[0].encode('utf-8') for row in c.fetchall()]) tags = list([row[0].encode('utf-8') for row in c.fetchall()])
if len(tags) > 0: data['tag'] = tags if len(tags) > 0: data['tag'] = tags
# get all languages # get all languages
c.execute("SELECT name FROM languages WHERE lang_id IN (SELECT lang_id FROM movie_lang WHERE movie_id=%s)" % id) c.execute("SELECT name FROM languages WHERE lang_id IN (SELECT lang_id FROM movie_lang WHERE movie_id=%s)" % id)
langs = list([row[0].encode('utf-8') for row in c.fetchall()]) langs = list([row[0].encode('utf-8') for row in c.fetchall()])
if len(langs) > 0: data['language'] = langs if len(langs) > 0: data['language'] = langs
return data return data

Loading…
Cancel
Save