mirror of
https://github.com/Card-Forge/forge.git
synced 2025-11-17 03:08:02 +00:00
- Simplified Oracle Scraper for magiccards.info
This commit is contained in:
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -15295,5 +15295,6 @@ tools/formats.txt -text
|
|||||||
tools/guilds.xlsx -text
|
tools/guilds.xlsx -text
|
||||||
tools/mtg-data.txt svneol=native#text/plain
|
tools/mtg-data.txt svneol=native#text/plain
|
||||||
tools/mtgdata-sets-to-forge.txt svneol=native#text/plain
|
tools/mtgdata-sets-to-forge.txt svneol=native#text/plain
|
||||||
|
tools/oracleScraper.py -text
|
||||||
tools/oracleScript.py svneol=native#text/x-python
|
tools/oracleScript.py svneol=native#text/x-python
|
||||||
tools/packdata.xlsx -text
|
tools/packdata.xlsx -text
|
||||||
|
|||||||
42
tools/oracleScraper.py
Normal file
42
tools/oracleScraper.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
setName = 'ths'
|
||||||
|
nameStart = '<span style="font-size: 1.2em;">'
|
||||||
|
oracleStart = '<p class="ctext"><b>'
|
||||||
|
oracleEnd = '</b></p>'
|
||||||
|
|
||||||
|
def normalizeName(name):
|
||||||
|
return name.lower().replace(',','').replace("'","").replace(' ', '_')
|
||||||
|
|
||||||
|
def normalizeOracle(oracle):
|
||||||
|
return oracle.replace(u'\u2014', '-').replace(u'\u2018', "'")
|
||||||
|
|
||||||
|
r = requests.get('http://magiccards.info/query?v=spoiler&s=issue&q=++e:%s/en' % setName)
|
||||||
|
spl = r.text.split(nameStart)
|
||||||
|
spl.pop(0) # Get rid of all of the html that comes before our first card
|
||||||
|
|
||||||
|
for s in spl:
|
||||||
|
# Extract name and oracle from magiccards.info
|
||||||
|
name = s[1 + s.find(">"):s.find("</a>")]
|
||||||
|
oracle = s[len(oracleStart)+s.find(oracleStart):s.find(oracleEnd)].replace('<br><br>', '\\n')
|
||||||
|
norm = normalizeName(name)
|
||||||
|
# Open relative cardsfolder
|
||||||
|
path = os.path.join('..','res','cardsfolder', norm[0], norm+'.txt')
|
||||||
|
|
||||||
|
hasOracle = False
|
||||||
|
try:
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
hasOracle |= line.startswith("Oracle:")
|
||||||
|
|
||||||
|
if not hasOracle:
|
||||||
|
with open(path, "a") as f:
|
||||||
|
f.write('\n')
|
||||||
|
f.write(normalizeOracle(oracle))
|
||||||
|
print '+ ', norm
|
||||||
|
else:
|
||||||
|
print '= ', norm
|
||||||
|
|
||||||
|
except:
|
||||||
|
print '? ', norm
|
||||||
Reference in New Issue
Block a user