From 8e34bd5ac84fe4d2d2bb0fd28979927e054756b3 Mon Sep 17 00:00:00 2001 From: Jonas Zeunert Date: Wed, 24 Apr 2024 00:36:57 +0200 Subject: [PATCH] Add dotfiles and scripts --- dotfiles/awesm.service | 69 ++++++ dotfiles/fscrawler/_default/6/_settings.json | 214 ++++++++++++++++++ .../_default/6/_settings_folder.json | 64 ++++++ dotfiles/fscrawler/_default/7/_settings.json | 214 ++++++++++++++++++ .../_default/7/_settings_folder.json | 64 ++++++ .../_default/7/_wpsearch_settings.json | 80 +++++++ dotfiles/fscrawler/_default/8/_settings.json | 214 ++++++++++++++++++ .../_default/8/_settings_folder.json | 64 ++++++ .../_default/8/_wpsearch_settings.json | 80 +++++++ dotfiles/fscrawler/awesm/_settings.yaml | 34 +++ dotfiles/fscrawler/awesm/_status.json | 6 + dotfiles/fscrawler/awesome/_settings.yaml | 34 +++ dotfiles/fscrawler/awesome/_status.json | 6 + dotfiles/nginx.conf | 52 +++++ scripts/delete_index.sh | 3 + scripts/get_indices.sh | 3 + scripts/update | 18 ++ scripts/update_fscrawler.sh | 3 + 18 files changed, 1222 insertions(+) create mode 100644 dotfiles/awesm.service create mode 100755 dotfiles/fscrawler/_default/6/_settings.json create mode 100755 dotfiles/fscrawler/_default/6/_settings_folder.json create mode 100755 dotfiles/fscrawler/_default/7/_settings.json create mode 100755 dotfiles/fscrawler/_default/7/_settings_folder.json create mode 100755 dotfiles/fscrawler/_default/7/_wpsearch_settings.json create mode 100755 dotfiles/fscrawler/_default/8/_settings.json create mode 100755 dotfiles/fscrawler/_default/8/_settings_folder.json create mode 100755 dotfiles/fscrawler/_default/8/_wpsearch_settings.json create mode 100755 dotfiles/fscrawler/awesm/_settings.yaml create mode 100755 dotfiles/fscrawler/awesm/_status.json create mode 100755 dotfiles/fscrawler/awesome/_settings.yaml create mode 100755 dotfiles/fscrawler/awesome/_status.json create mode 100644 dotfiles/nginx.conf create mode 100755 scripts/delete_index.sh create mode 100755 scripts/get_indices.sh create mode 100755 scripts/update create mode 100755 scripts/update_fscrawler.sh diff --git a/dotfiles/awesm.service b/dotfiles/awesm.service new file mode 100644 index 0000000..ba15b9c --- /dev/null +++ b/dotfiles/awesm.service @@ -0,0 +1,69 @@ +[Unit] +Description=Awesome search service +After=network.target + +[Service] +Type=simple +User=awesm +Group=awesm +Environment="RUST_LOG=info +ExecStart=/var/www/awesm/awesm-server/target/release/awesm +StandardOutput=append:/var/log/awesm/awesm.log +StandardError=inherit + +# Specifies the maximum file descriptor number that can be opened by this process +LimitNOFILE=1000 + +# Specifies the maximum number of processes +LimitNPROC=100 + +# Specifies the maximum size of virtual memory +LimitAS=infinity + +# Specifies the maximum file size +LimitFSIZE=infinity + +# Disable timeout logic and wait until process is stopped +TimeoutStopSec=0 + +KillSignal=SIGTERM + +# Allow a slow startup before the systemd notifier module kicks in to extend the timeout +TimeoutStartSec=900 + +# Sandboxing options to harden security +# Depending on specificities of your service/app, you may need to tweak these +# .. but this should be a good baseline +# Details for these options: https://www.freedesktop.org/software/systemd/man/systemd.exec.html +NoNewPrivileges=yes +PrivateTmp=yes +PrivateDevices=yes +RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK +RestrictNamespaces=yes +RestrictRealtime=yes +DevicePolicy=closed +ProtectClock=yes +ProtectHostname=yes +ProtectProc=invisible +ProtectSystem=full +ProtectControlGroups=yes +ProtectKernelModules=yes +ProtectKernelTunables=yes +LockPersonality=yes +SystemCallArchitectures=native +SystemCallFilter=~@clock @debug @module @mount @obsolete @reboot @setuid @swap @cpu-emulation @privileged + +# Denying access to capabilities that should not be relevant for webapps +# Doc: https://man7.org/linux/man-pages/man7/capabilities.7.html +CapabilityBoundingSet=~CAP_RAWIO CAP_MKNOD +CapabilityBoundingSet=~CAP_AUDIT_CONTROL CAP_AUDIT_READ CAP_AUDIT_WRITE +CapabilityBoundingSet=~CAP_SYS_BOOT CAP_SYS_TIME CAP_SYS_MODULE CAP_SYS_PACCT +CapabilityBoundingSet=~CAP_LEASE CAP_LINUX_IMMUTABLE CAP_IPC_LOCK +CapabilityBoundingSet=~CAP_BLOCK_SUSPEND CAP_WAKE_ALARM +CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG +CapabilityBoundingSet=~CAP_MAC_ADMIN CAP_MAC_OVERRIDE +CapabilityBoundingSet=~CAP_NET_ADMIN CAP_NET_BROADCAST CAP_NET_RAW +CapabilityBoundingSet=~CAP_SYS_ADMIN CAP_SYS_PTRACE CAP_SYSLOG + +[Install] +WantedBy=multi-user.target diff --git a/dotfiles/fscrawler/_default/6/_settings.json b/dotfiles/fscrawler/_default/6/_settings.json new file mode 100755 index 0000000..3fa9992 --- /dev/null +++ b/dotfiles/fscrawler/_default/6/_settings.json @@ -0,0 +1,214 @@ +{ + "settings": { + "number_of_shards": 1, + "index.mapping.total_fields.limit": 2000, + "analysis": { + "analyzer": { + "fscrawler_path": { + "tokenizer": "fscrawler_path" + } + }, + "tokenizer": { + "fscrawler_path": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "dynamic_templates": [ + { + "raw_as_text": { + "path_match": "meta.raw.*", + "mapping": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + ], + "properties": { + "attachment": { + "type": "binary", + "doc_values": false + }, + "attributes": { + "properties": { + "group": { + "type": "keyword" + }, + "owner": { + "type": "keyword" + } + } + }, + "content": { + "type": "text" + }, + "file": { + "properties": { + "content_type": { + "type": "keyword" + }, + "filename": { + "type": "keyword", + "store": true + }, + "extension": { + "type": "keyword" + }, + "filesize": { + "type": "long" + }, + "indexed_chars": { + "type": "long" + }, + "indexing_date": { + "type": "date", + "format": "dateOptionalTime" + }, + "created": { + "type": "date", + "format": "dateOptionalTime" + }, + "last_modified": { + "type": "date", + "format": "dateOptionalTime" + }, + "last_accessed": { + "type": "date", + "format": "dateOptionalTime" + }, + "checksum": { + "type": "keyword" + }, + "url": { + "type": "keyword", + "index": false + } + } + }, + "meta": { + "properties": { + "author": { + "type": "text" + }, + "date": { + "type": "date", + "format": "dateOptionalTime" + }, + "keywords": { + "type": "text" + }, + "title": { + "type": "text" + }, + "language": { + "type": "keyword" + }, + "format": { + "type": "text" + }, + "identifier": { + "type": "text" + }, + "contributor": { + "type": "text" + }, + "coverage": { + "type": "text" + }, + "modifier": { + "type": "text" + }, + "creator_tool": { + "type": "keyword" + }, + "publisher": { + "type": "text" + }, + "relation": { + "type": "text" + }, + "rights": { + "type": "text" + }, + "source": { + "type": "text" + }, + "type": { + "type": "text" + }, + "description": { + "type": "text" + }, + "created": { + "type": "date", + "format": "dateOptionalTime" + }, + "print_date": { + "type": "date", + "format": "dateOptionalTime" + }, + "metadata_date": { + "type": "date", + "format": "dateOptionalTime" + }, + "latitude": { + "type": "text" + }, + "longitude": { + "type": "text" + }, + "altitude": { + "type": "text" + }, + "rating": { + "type": "byte" + }, + "comments": { + "type": "text" + } + } + }, + "path": { + "properties": { + "real": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + }, + "root": { + "type": "keyword" + }, + "virtual": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + } + } + } + } + } +} diff --git a/dotfiles/fscrawler/_default/6/_settings_folder.json b/dotfiles/fscrawler/_default/6/_settings_folder.json new file mode 100755 index 0000000..2ed2abf --- /dev/null +++ b/dotfiles/fscrawler/_default/6/_settings_folder.json @@ -0,0 +1,64 @@ +{ + "settings": { + "analysis": { + "analyzer": { + "fscrawler_path": { + "tokenizer": "fscrawler_path" + } + }, + "tokenizer": { + "fscrawler_path": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "properties" : { + "file": { + "properties": { + "content_type": { + "type": "keyword" + }, + "filename": { + "type": "keyword", + "store": true + } + } + }, + "path": { + "properties": { + "real": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + }, + "root": { + "type": "keyword" + }, + "virtual": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + } + } + } + } + } +} diff --git a/dotfiles/fscrawler/_default/7/_settings.json b/dotfiles/fscrawler/_default/7/_settings.json new file mode 100755 index 0000000..3fa9992 --- /dev/null +++ b/dotfiles/fscrawler/_default/7/_settings.json @@ -0,0 +1,214 @@ +{ + "settings": { + "number_of_shards": 1, + "index.mapping.total_fields.limit": 2000, + "analysis": { + "analyzer": { + "fscrawler_path": { + "tokenizer": "fscrawler_path" + } + }, + "tokenizer": { + "fscrawler_path": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "dynamic_templates": [ + { + "raw_as_text": { + "path_match": "meta.raw.*", + "mapping": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + ], + "properties": { + "attachment": { + "type": "binary", + "doc_values": false + }, + "attributes": { + "properties": { + "group": { + "type": "keyword" + }, + "owner": { + "type": "keyword" + } + } + }, + "content": { + "type": "text" + }, + "file": { + "properties": { + "content_type": { + "type": "keyword" + }, + "filename": { + "type": "keyword", + "store": true + }, + "extension": { + "type": "keyword" + }, + "filesize": { + "type": "long" + }, + "indexed_chars": { + "type": "long" + }, + "indexing_date": { + "type": "date", + "format": "dateOptionalTime" + }, + "created": { + "type": "date", + "format": "dateOptionalTime" + }, + "last_modified": { + "type": "date", + "format": "dateOptionalTime" + }, + "last_accessed": { + "type": "date", + "format": "dateOptionalTime" + }, + "checksum": { + "type": "keyword" + }, + "url": { + "type": "keyword", + "index": false + } + } + }, + "meta": { + "properties": { + "author": { + "type": "text" + }, + "date": { + "type": "date", + "format": "dateOptionalTime" + }, + "keywords": { + "type": "text" + }, + "title": { + "type": "text" + }, + "language": { + "type": "keyword" + }, + "format": { + "type": "text" + }, + "identifier": { + "type": "text" + }, + "contributor": { + "type": "text" + }, + "coverage": { + "type": "text" + }, + "modifier": { + "type": "text" + }, + "creator_tool": { + "type": "keyword" + }, + "publisher": { + "type": "text" + }, + "relation": { + "type": "text" + }, + "rights": { + "type": "text" + }, + "source": { + "type": "text" + }, + "type": { + "type": "text" + }, + "description": { + "type": "text" + }, + "created": { + "type": "date", + "format": "dateOptionalTime" + }, + "print_date": { + "type": "date", + "format": "dateOptionalTime" + }, + "metadata_date": { + "type": "date", + "format": "dateOptionalTime" + }, + "latitude": { + "type": "text" + }, + "longitude": { + "type": "text" + }, + "altitude": { + "type": "text" + }, + "rating": { + "type": "byte" + }, + "comments": { + "type": "text" + } + } + }, + "path": { + "properties": { + "real": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + }, + "root": { + "type": "keyword" + }, + "virtual": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + } + } + } + } + } +} diff --git a/dotfiles/fscrawler/_default/7/_settings_folder.json b/dotfiles/fscrawler/_default/7/_settings_folder.json new file mode 100755 index 0000000..2ed2abf --- /dev/null +++ b/dotfiles/fscrawler/_default/7/_settings_folder.json @@ -0,0 +1,64 @@ +{ + "settings": { + "analysis": { + "analyzer": { + "fscrawler_path": { + "tokenizer": "fscrawler_path" + } + }, + "tokenizer": { + "fscrawler_path": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "properties" : { + "file": { + "properties": { + "content_type": { + "type": "keyword" + }, + "filename": { + "type": "keyword", + "store": true + } + } + }, + "path": { + "properties": { + "real": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + }, + "root": { + "type": "keyword" + }, + "virtual": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + } + } + } + } + } +} diff --git a/dotfiles/fscrawler/_default/7/_wpsearch_settings.json b/dotfiles/fscrawler/_default/7/_wpsearch_settings.json new file mode 100755 index 0000000..d942f94 --- /dev/null +++ b/dotfiles/fscrawler/_default/7/_wpsearch_settings.json @@ -0,0 +1,80 @@ +{ + "name": "SOURCE_NAME", + "schema": { + "title": "text", + "name": "text", + "body": "text", + "url": "text", + "author": "text", + "keywords": "text", + "language": "text", + "comments": "text", + "mime_type": "text", + "extension": "text", + "size": "number", + "text_size": "number", + "last_modified": "date", + "created_at": "date", + "path": "text" + }, + "display": { + "title_field": "title", + "subtitle_field": "name", + "description_field": "body", + "url_field": "url", + "media_type_field": "mime_type", + "created_by_field": "author", + "detail_fields": [ + { + "field_name": "author", + "label": "Author" + }, + { + "field_name": "keywords", + "label": "Keywords" + }, + { + "field_name": "language", + "label": "Language" + }, + { + "field_name": "last_modified", + "label": "Last Modification Date" + }, + { + "field_name": "created_at", + "label": "Creation date" + }, + { + "field_name": "comments", + "label": "Comments" + }, + { + "field_name": "mime_type", + "label": "Mime Type" + }, + { + "field_name": "extension", + "label": "Extension" + }, + { + "field_name": "size", + "label": "File size" + }, + { + "field_name": "text_size", + "label": "Extracted text size" + }, + { + "field_name": "path", + "label": "Path" + }, + { + "field_name": "body", + "label": "Content" + } + ], + "color": "#000000" + }, + "is_searchable": true +} diff --git a/dotfiles/fscrawler/_default/8/_settings.json b/dotfiles/fscrawler/_default/8/_settings.json new file mode 100755 index 0000000..46c762e --- /dev/null +++ b/dotfiles/fscrawler/_default/8/_settings.json @@ -0,0 +1,214 @@ +{ + "settings": { + "number_of_shards": 1, + "index.mapping.total_fields.limit": 2000, + "analysis": { + "analyzer": { + "fscrawler_path": { + "tokenizer": "fscrawler_path" + } + }, + "tokenizer": { + "fscrawler_path": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "dynamic_templates": [ + { + "raw_as_text": { + "path_match": "meta.raw.*", + "mapping": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + ], + "properties": { + "attachment": { + "type": "binary", + "doc_values": false + }, + "attributes": { + "properties": { + "group": { + "type": "keyword" + }, + "owner": { + "type": "keyword" + } + } + }, + "content": { + "type": "text" + }, + "file": { + "properties": { + "content_type": { + "type": "keyword" + }, + "filename": { + "type": "keyword", + "store": true + }, + "extension": { + "type": "keyword" + }, + "filesize": { + "type": "long" + }, + "indexed_chars": { + "type": "long" + }, + "indexing_date": { + "type": "date", + "format": "date_optional_time" + }, + "created": { + "type": "date", + "format": "date_optional_time" + }, + "last_modified": { + "type": "date", + "format": "date_optional_time" + }, + "last_accessed": { + "type": "date", + "format": "date_optional_time" + }, + "checksum": { + "type": "keyword" + }, + "url": { + "type": "keyword", + "index": false + } + } + }, + "meta": { + "properties": { + "author": { + "type": "text" + }, + "date": { + "type": "date", + "format": "date_optional_time" + }, + "keywords": { + "type": "text" + }, + "title": { + "type": "text" + }, + "language": { + "type": "keyword" + }, + "format": { + "type": "text" + }, + "identifier": { + "type": "text" + }, + "contributor": { + "type": "text" + }, + "coverage": { + "type": "text" + }, + "modifier": { + "type": "text" + }, + "creator_tool": { + "type": "keyword" + }, + "publisher": { + "type": "text" + }, + "relation": { + "type": "text" + }, + "rights": { + "type": "text" + }, + "source": { + "type": "text" + }, + "type": { + "type": "text" + }, + "description": { + "type": "text" + }, + "created": { + "type": "date", + "format": "date_optional_time" + }, + "print_date": { + "type": "date", + "format": "date_optional_time" + }, + "metadata_date": { + "type": "date", + "format": "date_optional_time" + }, + "latitude": { + "type": "text" + }, + "longitude": { + "type": "text" + }, + "altitude": { + "type": "text" + }, + "rating": { + "type": "byte" + }, + "comments": { + "type": "text" + } + } + }, + "path": { + "properties": { + "real": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + }, + "root": { + "type": "keyword" + }, + "virtual": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + } + } + } + } + } +} diff --git a/dotfiles/fscrawler/_default/8/_settings_folder.json b/dotfiles/fscrawler/_default/8/_settings_folder.json new file mode 100755 index 0000000..2ed2abf --- /dev/null +++ b/dotfiles/fscrawler/_default/8/_settings_folder.json @@ -0,0 +1,64 @@ +{ + "settings": { + "analysis": { + "analyzer": { + "fscrawler_path": { + "tokenizer": "fscrawler_path" + } + }, + "tokenizer": { + "fscrawler_path": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "properties" : { + "file": { + "properties": { + "content_type": { + "type": "keyword" + }, + "filename": { + "type": "keyword", + "store": true + } + } + }, + "path": { + "properties": { + "real": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + }, + "root": { + "type": "keyword" + }, + "virtual": { + "type": "keyword", + "fields": { + "tree": { + "type": "text", + "analyzer": "fscrawler_path", + "fielddata": true + }, + "fulltext": { + "type": "text" + } + } + } + } + } + } + } +} diff --git a/dotfiles/fscrawler/_default/8/_wpsearch_settings.json b/dotfiles/fscrawler/_default/8/_wpsearch_settings.json new file mode 100755 index 0000000..d942f94 --- /dev/null +++ b/dotfiles/fscrawler/_default/8/_wpsearch_settings.json @@ -0,0 +1,80 @@ +{ + "name": "SOURCE_NAME", + "schema": { + "title": "text", + "name": "text", + "body": "text", + "url": "text", + "author": "text", + "keywords": "text", + "language": "text", + "comments": "text", + "mime_type": "text", + "extension": "text", + "size": "number", + "text_size": "number", + "last_modified": "date", + "created_at": "date", + "path": "text" + }, + "display": { + "title_field": "title", + "subtitle_field": "name", + "description_field": "body", + "url_field": "url", + "media_type_field": "mime_type", + "created_by_field": "author", + "detail_fields": [ + { + "field_name": "author", + "label": "Author" + }, + { + "field_name": "keywords", + "label": "Keywords" + }, + { + "field_name": "language", + "label": "Language" + }, + { + "field_name": "last_modified", + "label": "Last Modification Date" + }, + { + "field_name": "created_at", + "label": "Creation date" + }, + { + "field_name": "comments", + "label": "Comments" + }, + { + "field_name": "mime_type", + "label": "Mime Type" + }, + { + "field_name": "extension", + "label": "Extension" + }, + { + "field_name": "size", + "label": "File size" + }, + { + "field_name": "text_size", + "label": "Extracted text size" + }, + { + "field_name": "path", + "label": "Path" + }, + { + "field_name": "body", + "label": "Content" + } + ], + "color": "#000000" + }, + "is_searchable": true +} diff --git a/dotfiles/fscrawler/awesm/_settings.yaml b/dotfiles/fscrawler/awesm/_settings.yaml new file mode 100755 index 0000000..828d1b1 --- /dev/null +++ b/dotfiles/fscrawler/awesm/_settings.yaml @@ -0,0 +1,34 @@ +--- +name: "awesm" +fs: + url: "/home/awesm/data/awesome-awesomeness/readmes" + update_rate: "15m" + json_support: false + includes: + - "*.md" + excludes: + - "index.md" + filename_as_id: true + add_filesize: true + remove_deleted: false + add_as_inner_object: true + store_source: true + index_content: true + attributes_support: false + raw_metadata: false + xml_support: false + index_folders: true + lang_detect: true + continue_on_error: true + ocr: + language: "eng" + enabled: true + pdf_strategy: "ocr_and_text" + follow_symlinks: false +elasticsearch: + nodes: + - url: "http://127.0.0.1:9200" + bulk_size: 100 + flush_interval: "5s" + byte_size: "10mb" + ssl_verification: true diff --git a/dotfiles/fscrawler/awesm/_status.json b/dotfiles/fscrawler/awesm/_status.json new file mode 100755 index 0000000..a3572e2 --- /dev/null +++ b/dotfiles/fscrawler/awesm/_status.json @@ -0,0 +1,6 @@ +{ + "name" : "awesm", + "lastrun" : "2024-04-24T00:23:46.417072", + "indexed" : 2, + "deleted" : 0 +} \ No newline at end of file diff --git a/dotfiles/fscrawler/awesome/_settings.yaml b/dotfiles/fscrawler/awesome/_settings.yaml new file mode 100755 index 0000000..beff8cd --- /dev/null +++ b/dotfiles/fscrawler/awesome/_settings.yaml @@ -0,0 +1,34 @@ +--- +name: "awesome" +fs: + url: "/home/awesm/data/awesome-awesomeness/readmes" + update_rate: "15m" + excludes: + - "*/~*" + includes: + - "*.md" + json_support: false + filename_as_id: false + add_filesize: true + remove_deleted: true + add_as_inner_object: true + store_source: false + index_content: true + attributes_support: false + raw_metadata: false + xml_support: false + index_folders: true + lang_detect: false + continue_on_error: false + ocr: + language: "eng" + enabled: true + pdf_strategy: "ocr_and_text" + follow_symlinks: false +elasticsearch: + nodes: + - url: "http://127.0.0.1:9200" + bulk_size: 100 + flush_interval: "5s" + byte_size: "10mb" + ssl_verification: true diff --git a/dotfiles/fscrawler/awesome/_status.json b/dotfiles/fscrawler/awesome/_status.json new file mode 100755 index 0000000..9251508 --- /dev/null +++ b/dotfiles/fscrawler/awesome/_status.json @@ -0,0 +1,6 @@ +{ + "name" : "awesome", + "lastrun" : "2024-04-22T22:33:26.713382", + "indexed" : 0, + "deleted" : 0 +} \ No newline at end of file diff --git a/dotfiles/nginx.conf b/dotfiles/nginx.conf new file mode 100644 index 0000000..a13491a --- /dev/null +++ b/dotfiles/nginx.conf @@ -0,0 +1,52 @@ +# Include before server +#map $term $root { +# 1 /var/www/awesm/terminal; +# default /var/www/awesm/html; +#} +# +#map $http_user_agent $autoindex_type { +# "~*curl*" jsonp; +# default html; +#} + +#sub_path_only rewrite ^/$ / permanentm +location = /lists o + alias $root; + autoindex on; + + autoindex_format jsonp; +} + + +location /stats { + autoindex off; + alias /var/www/awesm/stats; +} + +location /style { + autoindex off; + alias /var/www/awesm/style; +} + +location = / { + root $root; + + try_files /index /index.html; +} + +#location ~* ^/lists { +location / { + root $root; + + try_files $uri $uri.html /redirect?q=$uri; +} + + +location /search { + set $args $args&pretty; + proxy_pass http://[::1]:9876; +} + +location /redirect { + proxy_pass http://[::1]:9876; +} diff --git a/scripts/delete_index.sh b/scripts/delete_index.sh new file mode 100755 index 0000000..8373667 --- /dev/null +++ b/scripts/delete_index.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +curl -XDELETE "http://127.0.0.1:9200/$1" diff --git a/scripts/get_indices.sh b/scripts/get_indices.sh new file mode 100755 index 0000000..a130abd --- /dev/null +++ b/scripts/get_indices.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +curl -XGET "http://127.0.0.1:9200/_cat/indices" diff --git a/scripts/update b/scripts/update new file mode 100755 index 0000000..b71deca --- /dev/null +++ b/scripts/update @@ -0,0 +1,18 @@ +#!/bin/sh + +set -eux +cd data/awesome-awesomeness +git stash && git pull + +~/update_fscrawler.sh + +rm -rf /var/www/awesm/html /var/www/awesm/terminal +mv html terminal /var/www/awesm/ + +cd /var/www/awesm +chown -R :www-data html terminal + +date -ur html > stats/updated.txt +ls html | wc -l > stats/sites.txt + +chmod -R o-rwx ./* diff --git a/scripts/update_fscrawler.sh b/scripts/update_fscrawler.sh new file mode 100755 index 0000000..0b44642 --- /dev/null +++ b/scripts/update_fscrawler.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +~/data/fscrawler-distribution-2.10-SNAPSHOT/bin/fscrawler --loop 1 --trace awesm