Moved to single file
This commit is contained in:
parent
0a04d8f68f
commit
664c7c46a0
1
.gitignore
vendored
1
.gitignore
vendored
@ -163,3 +163,4 @@ cython_debug/
|
|||||||
data.json
|
data.json
|
||||||
*.osm
|
*.osm
|
||||||
*.gpx
|
*.gpx
|
||||||
|
cache/
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
import re
|
|
||||||
from io import StringIO
|
|
||||||
import csv
|
|
||||||
import json
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
def get_megalithic_data(country=1):
|
|
||||||
def megalithic_url(country):
|
|
||||||
return 'https://www.megalithic.co.uk/cache/csvmap_country{}.csv'.format(country)
|
|
||||||
|
|
||||||
# Megalithic doesn't really want people scraping
|
|
||||||
response = requests.get(
|
|
||||||
megalithic_url(country),
|
|
||||||
headers={
|
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0'
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise Exception('Failed to fetch data from Megalithic.UK: {}'.format(response.text))
|
|
||||||
|
|
||||||
content = StringIO(response.text)
|
|
||||||
reader = csv.DictReader(content, delimiter='|')
|
|
||||||
|
|
||||||
data = []
|
|
||||||
for row in reader:
|
|
||||||
data.append({
|
|
||||||
'lat': row['lat'],
|
|
||||||
'lng': row['lng'],
|
|
||||||
'name': row['Name'],
|
|
||||||
'type': row['Type'],
|
|
||||||
'url': 'https://megalithic.co.uk/article.php?sid={}'.format(row['SID']),
|
|
||||||
})
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def get_stone_circles_data():
|
|
||||||
response = requests.get('http://www.stone-circles.org.uk/stone/Cluster/Coords/coords.js')
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise Exception('Failed to fetch data from stone-circles.org.uk: {}'.format(response.text))
|
|
||||||
|
|
||||||
content = re.match(
|
|
||||||
r'.+ = \[(\[.+]),?];',
|
|
||||||
response.text.replace('\n', '')
|
|
||||||
)
|
|
||||||
content = re.sub(r'\\(?!")', '', content.groups()[0])
|
|
||||||
arr = json.loads('[{}]'.format(content))
|
|
||||||
|
|
||||||
data = []
|
|
||||||
for item in arr:
|
|
||||||
data.append({
|
|
||||||
'lat': item[0],
|
|
||||||
'lng': item[1],
|
|
||||||
'name': re.sub(r'<.+?>', '', re.match(r'<b>(.+)</b>', item[2]).groups()[0]),
|
|
||||||
'type': re.sub(r'.+>', '', item[2].replace('<br>', ' ')),
|
|
||||||
'url': 'http://www.stone-circles.org.uk/stone/{}'.format(re.search(r'href=([a-zA-Z.]+)', item[2]).groups()[0]),
|
|
||||||
})
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
all_data = get_stone_circles_data() # + get_megalithic_data()
|
|
||||||
|
|
||||||
with open('data.json', 'w') as f:
|
|
||||||
json.dump(all_data, f)
|
|
161
gpx.py
Normal file
161
gpx.py
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
from html import escape
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="megalithosm",
|
||||||
|
description="Fetch data from megalith sources and produce a GPX file",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-q",
|
||||||
|
"--quality",
|
||||||
|
default=5,
|
||||||
|
type=int,
|
||||||
|
help="Include sites of this quality or higher",
|
||||||
|
)
|
||||||
|
parser.add_argument("-r", "--refetch", action="store_true", default=False)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
country_range = 6
|
||||||
|
|
||||||
|
|
||||||
|
def cache_stone_circles():
|
||||||
|
response = requests.get(
|
||||||
|
"http://www.stone-circles.org.uk/stone/Cluster/Coords/coords.js"
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(
|
||||||
|
"Failed to fetch data from stone-circles.org.uk: {}".format(response.text)
|
||||||
|
)
|
||||||
|
|
||||||
|
Path("cache").mkdir(exist_ok=True)
|
||||||
|
with open("cache/stone-circles.js", "w") as f:
|
||||||
|
f.write(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def cache_megalithic():
|
||||||
|
def megalithic_url(country):
|
||||||
|
return "https://www.megalithic.co.uk/cache/csvmap_country{}.csv".format(country)
|
||||||
|
|
||||||
|
for country in range(1, country_range):
|
||||||
|
# Megalithic doesn't really want people scraping
|
||||||
|
response = requests.get(
|
||||||
|
megalithic_url(country),
|
||||||
|
headers={
|
||||||
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(
|
||||||
|
"Failed to fetch data from Megalithic.UK: {}".format(response.text)
|
||||||
|
)
|
||||||
|
|
||||||
|
Path("cache").mkdir(exist_ok=True)
|
||||||
|
with open("cache/megalithic-{}.csv".format(country), "w") as f:
|
||||||
|
f.write(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
if args.refetch or not Path("cache/stone-circles.js").exists():
|
||||||
|
print("Refreshing stone-circles.org.uk cache")
|
||||||
|
cache_stone_circles()
|
||||||
|
|
||||||
|
if args.refetch or not all(
|
||||||
|
Path("cache/megalithic-{}.csv".format(c)).exists() for c in range(1, country_range)
|
||||||
|
):
|
||||||
|
print("Refreshing megalithic.co.uk cache")
|
||||||
|
cache_megalithic()
|
||||||
|
|
||||||
|
|
||||||
|
print("Post-processing data")
|
||||||
|
data = []
|
||||||
|
for country in range(1, country_range):
|
||||||
|
with open("cache/megalithic-{}.csv".format(country)) as f:
|
||||||
|
reader = csv.DictReader(f, delimiter="|")
|
||||||
|
types = set()
|
||||||
|
for row in reader:
|
||||||
|
types.add(row["Type"])
|
||||||
|
data.append(
|
||||||
|
{
|
||||||
|
"lat": row["lat"],
|
||||||
|
"lng": row["lng"],
|
||||||
|
"name": row["Name"],
|
||||||
|
"type": row["Type"],
|
||||||
|
"url": "https://megalithic.co.uk/article.php?sid={}".format(
|
||||||
|
row["SID"]
|
||||||
|
),
|
||||||
|
"quality": int(row["Condition"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
with open("cache/stone-circles.js") as f:
|
||||||
|
content = f.read()
|
||||||
|
content = re.match(r".+ = \[(\[.+]),?];", content.replace("\n", ""))
|
||||||
|
content = re.sub(r'\\(?!")', "", content.groups()[0])
|
||||||
|
arr = json.loads("[{}]".format(content))
|
||||||
|
for item in arr:
|
||||||
|
data.append(
|
||||||
|
{
|
||||||
|
"lat": item[0],
|
||||||
|
"lng": item[1],
|
||||||
|
"name": re.sub(
|
||||||
|
r"<.+?>", "", re.match(r"<b>(.+)</b>", item[2]).groups()[0]
|
||||||
|
),
|
||||||
|
"type": re.sub(r".+>", "", item[2].replace("<br>", " ")),
|
||||||
|
"url": "http://www.stone-circles.org.uk/stone/{}".format(
|
||||||
|
re.search(r"href=([a-zA-Z.]+)", item[2]).groups()[0]
|
||||||
|
),
|
||||||
|
"quality": 5,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
print("Generating GPX")
|
||||||
|
with open("Megaliths.gpx", "w") as gpx_file:
|
||||||
|
gpx_file.write(
|
||||||
|
"""<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<gpx xmlns="http://www.topografix.com/GPX/1/1"
|
||||||
|
version="1.1"
|
||||||
|
creator="megalithosm"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd">
|
||||||
|
<metadata>
|
||||||
|
<name>Megalith sites</name>
|
||||||
|
<author>
|
||||||
|
<name>Jude Southworth</name>
|
||||||
|
</author>
|
||||||
|
</metadata>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
seen_sites = set()
|
||||||
|
for poi in data:
|
||||||
|
norm_name = re.sub(r"\s", "", poi["name"]).lower()
|
||||||
|
site_key = "{},{},{}".format(poi["lat"], poi["lng"], norm_name)
|
||||||
|
if site_key in seen_sites:
|
||||||
|
print("Omitting duplicate site: {}".format(poi["name"]))
|
||||||
|
continue
|
||||||
|
if poi["quality"] < args.quality:
|
||||||
|
continue
|
||||||
|
if poi["type"] in ["Museum", "Modern Stone Circle etc"]:
|
||||||
|
print("Omitting uninteresting feature: {}".format(poi["name"]))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Deduplicate entries
|
||||||
|
seen_sites.add(site_key)
|
||||||
|
name = "{} ({})".format(poi["name"], poi["type"].strip())
|
||||||
|
gpx_file.write(
|
||||||
|
"""
|
||||||
|
<wpt lat="{}" lon="{}">
|
||||||
|
<name>{}</name>
|
||||||
|
<desc>{}</desc>
|
||||||
|
</wpt>""".format(
|
||||||
|
poi["lat"], poi["lng"], escape(name), escape(poi["url"])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
gpx_file.write("""\n</gpx>""")
|
@ -1,2 +1,3 @@
|
|||||||
requests~=2.31
|
requests~=2.31
|
||||||
pandas~=2.2
|
pandas~=2.2
|
||||||
|
black~=24.2
|
||||||
|
31
to_gpx.py
31
to_gpx.py
@ -1,31 +0,0 @@
|
|||||||
import json
|
|
||||||
from html import escape
|
|
||||||
|
|
||||||
with open('data.json') as data_file:
|
|
||||||
data = json.load(data_file)
|
|
||||||
|
|
||||||
with open('Megaliths.gpx', 'w') as gpx_file:
|
|
||||||
gpx_file.write('''<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<gpx xmlns="http://www.topografix.com/GPX/1/1"
|
|
||||||
version="1.1"
|
|
||||||
creator="megalithosm"
|
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd">
|
|
||||||
<metadata>
|
|
||||||
<name>Megalith sites</name>
|
|
||||||
<author>
|
|
||||||
<name>Jude Southworth</name>
|
|
||||||
</author>
|
|
||||||
</metadata>''')
|
|
||||||
|
|
||||||
for poi in data:
|
|
||||||
name = '{} ({})'.format(poi['name'], poi['type'].strip())
|
|
||||||
|
|
||||||
gpx_file.write(
|
|
||||||
'''
|
|
||||||
<wpt lat="{}" lon="{}">
|
|
||||||
<name>{}</name>
|
|
||||||
<desc>{}</desc>
|
|
||||||
</wpt>'''.format(poi['lat'], poi['lng'], escape(name), poi['url'])
|
|
||||||
)
|
|
||||||
gpx_file.write('</gpx>')
|
|
Loading…
Reference in New Issue
Block a user