megalith-osm/fetch_data.py

63 lines
1.6 KiB
Python
Raw Normal View History

2024-02-24 22:06:20 +00:00
import re
from io import StringIO
import csv
import json
import requests
def get_megalithic_data(country=1):
def megalithic_url(country):
return 'https://www.megalithic.co.uk/cache/csvmap_country{}.csv'.format(country)
# Megalithic doesn't really want people scraping
response = requests.get(
megalithic_url(country),
headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0'
}
)
if response.status_code != 200:
raise Exception('Failed to fetch data from Megalithic.UK: {}'.format(response.text))
content = StringIO(response.text)
reader = csv.DictReader(content, delimiter='|')
data = []
for row in reader:
data.append({
'lat': row['lat'],
'lng': row['lng'],
'name': row['Name'],
})
return data
def get_stone_circles_data():
response = requests.get('http://www.stone-circles.org.uk/stone/Cluster/Coords/coords.js')
if response.status_code != 200:
raise Exception('Failed to fetch data from stone-circles.org.uk: {}'.format(response.text))
content = re.match(
r'.+ = \[(\[.+]),?];',
response.text.replace('\n', '')
)
content = re.sub(r'\\(?!")', '', content.groups()[0])
arr = json.loads('[{}]'.format(content))
data = []
for item in arr:
data.append({
'lat': item[0],
'lng': item[1],
'name': re.match(r'.+<a .+>(.+)</a>', item[2]).groups()[0],
})
return data
if __name__ == '__main__':
all_data = get_megalithic_data() + get_stone_circles_data()