diff --git a/.gitattributes b/.gitattributes index ad6c5330b1c..bdf96bc91bd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15295,5 +15295,6 @@ tools/formats.txt -text tools/guilds.xlsx -text tools/mtg-data.txt svneol=native#text/plain tools/mtgdata-sets-to-forge.txt svneol=native#text/plain +tools/oracleScraper.py -text tools/oracleScript.py svneol=native#text/x-python tools/packdata.xlsx -text diff --git a/tools/oracleScraper.py b/tools/oracleScraper.py new file mode 100644 index 00000000000..a92b46e1b13 --- /dev/null +++ b/tools/oracleScraper.py @@ -0,0 +1,42 @@ +import os +import requests + +setName = 'ths' +nameStart = '' +oracleStart = '

' +oracleEnd = '

' + +def normalizeName(name): + return name.lower().replace(',','').replace("'","").replace(' ', '_') + +def normalizeOracle(oracle): + return oracle.replace(u'\u2014', '-').replace(u'\u2018', "'") + +r = requests.get('http://magiccards.info/query?v=spoiler&s=issue&q=++e:%s/en' % setName) +spl = r.text.split(nameStart) +spl.pop(0) # Get rid of all of the html that comes before our first card + +for s in spl: + # Extract name and oracle from magiccards.info + name = s[1 + s.find(">"):s.find("")] + oracle = s[len(oracleStart)+s.find(oracleStart):s.find(oracleEnd)].replace('

', '\\n') + norm = normalizeName(name) + # Open relative cardsfolder + path = os.path.join('..','res','cardsfolder', norm[0], norm+'.txt') + + hasOracle = False + try: + with open(path, 'r') as f: + for line in f.readlines(): + hasOracle |= line.startswith("Oracle:") + + if not hasOracle: + with open(path, "a") as f: + f.write('\n') + f.write(normalizeOracle(oracle)) + print '+ ', norm + else: + print '= ', norm + + except: + print '? ', norm \ No newline at end of file