2024-02-24 22:06:20 +00:00
|
|
|
import re
|
|
|
|
from io import StringIO
|
|
|
|
import csv
|
|
|
|
import json
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
def get_megalithic_data(country=1):
|
|
|
|
def megalithic_url(country):
|
|
|
|
return 'https://www.megalithic.co.uk/cache/csvmap_country{}.csv'.format(country)
|
|
|
|
|
|
|
|
# Megalithic doesn't really want people scraping
|
|
|
|
response = requests.get(
|
|
|
|
megalithic_url(country),
|
|
|
|
headers={
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0'
|
|
|
|
}
|
|
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
|
|
raise Exception('Failed to fetch data from Megalithic.UK: {}'.format(response.text))
|
|
|
|
|
|
|
|
content = StringIO(response.text)
|
|
|
|
reader = csv.DictReader(content, delimiter='|')
|
|
|
|
|
|
|
|
data = []
|
|
|
|
for row in reader:
|
|
|
|
data.append({
|
|
|
|
'lat': row['lat'],
|
|
|
|
'lng': row['lng'],
|
|
|
|
'name': row['Name'],
|
2024-02-25 15:11:38 +00:00
|
|
|
'type': row['Type'],
|
|
|
|
'url': 'https://megalithic.co.uk/article.php?sid={}'.format(row['SID']),
|
2024-02-24 22:06:20 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
def get_stone_circles_data():
|
|
|
|
response = requests.get('http://www.stone-circles.org.uk/stone/Cluster/Coords/coords.js')
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
raise Exception('Failed to fetch data from stone-circles.org.uk: {}'.format(response.text))
|
|
|
|
|
|
|
|
content = re.match(
|
|
|
|
r'.+ = \[(\[.+]),?];',
|
|
|
|
response.text.replace('\n', '')
|
|
|
|
)
|
|
|
|
content = re.sub(r'\\(?!")', '', content.groups()[0])
|
|
|
|
arr = json.loads('[{}]'.format(content))
|
|
|
|
|
|
|
|
data = []
|
|
|
|
for item in arr:
|
|
|
|
data.append({
|
|
|
|
'lat': item[0],
|
|
|
|
'lng': item[1],
|
2024-02-25 15:11:38 +00:00
|
|
|
'name': re.sub(r'<.+?>', '', re.match(r'<b>(.+)</b>', item[2]).groups()[0]),
|
|
|
|
'type': re.sub(r'.+>', '', item[2].replace('<br>', ' ')),
|
|
|
|
'url': 'http://www.stone-circles.org.uk/stone/{}'.format(re.search(r'href=([a-zA-Z.]+)', item[2]).groups()[0]),
|
2024-02-24 22:06:20 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2024-02-25 15:11:38 +00:00
|
|
|
all_data = get_stone_circles_data() # + get_megalithic_data()
|
|
|
|
|
|
|
|
with open('data.json', 'w') as f:
|
|
|
|
json.dump(all_data, f)
|