mirror of
https://github.com/Card-Forge/forge.git
synced 2025-11-17 03:08:02 +00:00
- Simplified Oracle Scraper for magiccards.info
This commit is contained in:
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -15295,5 +15295,6 @@ tools/formats.txt -text
|
||||
tools/guilds.xlsx -text
|
||||
tools/mtg-data.txt svneol=native#text/plain
|
||||
tools/mtgdata-sets-to-forge.txt svneol=native#text/plain
|
||||
tools/oracleScraper.py -text
|
||||
tools/oracleScript.py svneol=native#text/x-python
|
||||
tools/packdata.xlsx -text
|
||||
|
||||
42
tools/oracleScraper.py
Normal file
42
tools/oracleScraper.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import os
|
||||
import requests
|
||||
|
||||
setName = 'ths'
|
||||
nameStart = '<span style="font-size: 1.2em;">'
|
||||
oracleStart = '<p class="ctext"><b>'
|
||||
oracleEnd = '</b></p>'
|
||||
|
||||
def normalizeName(name):
|
||||
return name.lower().replace(',','').replace("'","").replace(' ', '_')
|
||||
|
||||
def normalizeOracle(oracle):
|
||||
return oracle.replace(u'\u2014', '-').replace(u'\u2018', "'")
|
||||
|
||||
r = requests.get('http://magiccards.info/query?v=spoiler&s=issue&q=++e:%s/en' % setName)
|
||||
spl = r.text.split(nameStart)
|
||||
spl.pop(0) # Get rid of all of the html that comes before our first card
|
||||
|
||||
for s in spl:
|
||||
# Extract name and oracle from magiccards.info
|
||||
name = s[1 + s.find(">"):s.find("</a>")]
|
||||
oracle = s[len(oracleStart)+s.find(oracleStart):s.find(oracleEnd)].replace('<br><br>', '\\n')
|
||||
norm = normalizeName(name)
|
||||
# Open relative cardsfolder
|
||||
path = os.path.join('..','res','cardsfolder', norm[0], norm+'.txt')
|
||||
|
||||
hasOracle = False
|
||||
try:
|
||||
with open(path, 'r') as f:
|
||||
for line in f.readlines():
|
||||
hasOracle |= line.startswith("Oracle:")
|
||||
|
||||
if not hasOracle:
|
||||
with open(path, "a") as f:
|
||||
f.write('\n')
|
||||
f.write(normalizeOracle(oracle))
|
||||
print '+ ', norm
|
||||
else:
|
||||
print '= ', norm
|
||||
|
||||
except:
|
||||
print '? ', norm
|
||||
Reference in New Issue
Block a user