commit 3a0a59c76f7211aa027f68484d675965ab4320ad Author: jude Date: Sat Feb 24 22:06:20 2024 +0000 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2dc53ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/fetch_data.py b/fetch_data.py new file mode 100644 index 0000000..8afe2be --- /dev/null +++ b/fetch_data.py @@ -0,0 +1,62 @@ +import re +from io import StringIO +import csv +import json + +import requests + + +def get_megalithic_data(country=1): + def megalithic_url(country): + return 'https://www.megalithic.co.uk/cache/csvmap_country{}.csv'.format(country) + + # Megalithic doesn't really want people scraping + response = requests.get( + megalithic_url(country), + headers={ + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' + } + ) + if response.status_code != 200: + raise Exception('Failed to fetch data from Megalithic.UK: {}'.format(response.text)) + + content = StringIO(response.text) + reader = csv.DictReader(content, delimiter='|') + + data = [] + for row in reader: + data.append({ + 'lat': row['lat'], + 'lng': row['lng'], + 'name': row['Name'], + }) + + return data + + +def get_stone_circles_data(): + response = requests.get('http://www.stone-circles.org.uk/stone/Cluster/Coords/coords.js') + + if response.status_code != 200: + raise Exception('Failed to fetch data from stone-circles.org.uk: {}'.format(response.text)) + + content = re.match( + r'.+ = \[(\[.+]),?];', + response.text.replace('\n', '') + ) + content = re.sub(r'\\(?!")', '', content.groups()[0]) + arr = json.loads('[{}]'.format(content)) + + data = [] + for item in arr: + data.append({ + 'lat': item[0], + 'lng': item[1], + 'name': re.match(r'.+(.+)', item[2]).groups()[0], + }) + + return data + + +if __name__ == '__main__': + all_data = get_megalithic_data() + get_stone_circles_data() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cc8680d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests~=2.31 +pandas~=2.2