import re from io import StringIO import csv import json import requests def get_megalithic_data(country=1): def megalithic_url(country): return 'https://www.megalithic.co.uk/cache/csvmap_country{}.csv'.format(country) # Megalithic doesn't really want people scraping response = requests.get( megalithic_url(country), headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' } ) if response.status_code != 200: raise Exception('Failed to fetch data from Megalithic.UK: {}'.format(response.text)) content = StringIO(response.text) reader = csv.DictReader(content, delimiter='|') data = [] for row in reader: data.append({ 'lat': row['lat'], 'lng': row['lng'], 'name': row['Name'], }) return data def get_stone_circles_data(): response = requests.get('http://www.stone-circles.org.uk/stone/Cluster/Coords/coords.js') if response.status_code != 200: raise Exception('Failed to fetch data from stone-circles.org.uk: {}'.format(response.text)) content = re.match( r'.+ = \[(\[.+]),?];', response.text.replace('\n', '') ) content = re.sub(r'\\(?!")', '', content.groups()[0]) arr = json.loads('[{}]'.format(content)) data = [] for item in arr: data.append({ 'lat': item[0], 'lng': item[1], 'name': re.match(r'.+(.+)', item[2]).groups()[0], }) return data if __name__ == '__main__': all_data = get_megalithic_data() + get_stone_circles_data()