From 181f45d92f01a01e09cbb9e78a17de1a86169333 Mon Sep 17 00:00:00 2001
From: Paul Feitzinger <paul@pfeyz.com>
Date: Fri, 24 Jan 2025 15:31:51 -0500
Subject: [PATCH] add readme and requirements file

---
 README.md        | 12 ++++++++++++
 requirements.txt | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 README.md
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0652c89
--- /dev/null
+++ b/README.md
@@ -0,0 +1,12 @@
+This is a simple web scraper in Python using [scrapy](https://docs.scrapy.org/) that writes all the markdown from https://basement.woodbine.nyc/ to disk.
+
+Appending `/download` to the end of any hedgedoc page url will return a text file with the markdown. The scraper starts at the markdown version of the homepage and scrapes `[text](hyperlink)` style markdown links. If there are wiki pages that are not linked to from anywhere else this script will not find them.
+
+Run like this:
+
+    $ python -m .venv venv
+    $ source .venv/bin/activate
+    $ pip install -r requirements
+    $ scrapy crawl pages
+
+The markdown output will appear in the `None/basement.woodbine.nyc` directory.
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cb952cc
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,37 @@
+attrs==24.3.0
+Automat==24.8.1
+certifi==2024.12.14
+cffi==1.17.1
+charset-normalizer==3.4.1
+constantly==23.10.4
+cryptography==44.0.0
+cssselect==1.2.0
+defusedxml==0.7.1
+filelock==3.17.0
+hyperlink==21.0.0
+idna==3.10
+incremental==24.7.2
+itemadapter==0.10.0
+itemloaders==1.3.2
+jmespath==1.0.1
+lxml==5.3.0
+packaging==24.2
+parsel==1.10.0
+Protego==0.4.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+PyDispatcher==2.0.7
+pyOpenSSL==25.0.0
+queuelib==1.7.0
+requests==2.32.3
+requests-file==2.1.0
+Scrapy==2.12.0
+service-identity==24.2.0
+setuptools==75.8.0
+tldextract==5.1.3
+Twisted==24.11.0
+typing_extensions==4.12.2
+urllib3==2.3.0
+w3lib==2.2.1
+zope.interface==7.2