Skip to content

⚓ Filter | Parse 🔰 #8238

⚓ Filter | Parse 🔰

⚓ Filter | Parse 🔰 #8238

Workflow file for this run

name: ⚓ Filter | Parse 🔰
# This will likely increase as Repo Size gets bigger over time
#MAX_RUNTIME: 30-45 Mins
workflow_dispatch: # Whenever manually run from actions-tab
#- cron: "45 23 * * *" #( 11:45 PM UTC --> 05:30 AM Morning )
- cron: "45 * * * *" #( Every 45 Mins )
RCLONE_CF_R2_PUB: "${{ secrets.RCLONE_CF_R2_PUB }}"
SSH_META: "${{ secrets.SF_SSH_HOSTS }}"
runs-on: ubuntu-latest
timeout-minutes: 45 # Can't afford to finish the 2000 Minutes quotes
contents: write
- name: Debloat Runner
run: |
set -x ; set +e
bash <(curl -qfsSL "")
continue-on-error: true
- name: Checkout repository
uses: actions/checkout@v4
path: "main"
filter: "blob:none" #
- name: Setup Env
run: |
set +x ; set +e
sudo apt update -y
sudo apt install bc coreutils curl dos2unix fdupes jq moreutils wget -y
sudo apt-get install apt-transport-https apt-utils ca-certificates coreutils dos2unix gnupg2 jq moreutils p7zip-full rename rsync software-properties-common texinfo tmux util-linux wget -y 2>/dev/null ; sudo apt-get update -y 2>/dev/null
##Setup rClone
mkdir -p "$HOME/.config/rclone"
echo "${{ secrets.RCLONE_CF_R2_PUB }}" > "$HOME/.config/rclone/rclone.conf"
USER_AGENT="$(curl -qfsSL '')" && export USER_AGENT="$USER_AGENT"
continue-on-error: true
- name: Install eget
run: |
# Presets
set -x ; set +e
# eget for bins
sudo wget "" -O "/usr/local/bin/eget"
sudo chmod +xwr "/usr/local/bin/eget"
continue-on-error: false
- name: Install Deps
run: |
# Presets
set -x ; set +e
sudo eget "" --to "/usr/local/bin/7z"
sudo eget "" --to "/usr/bin/7z"
sudo eget "" --to "/usr/local/bin/anew"
sudo eget "" --to "/usr/local/bin/anew-rs"
sudo eget "" --to "/usr/local/bin/asn"
sudo eget "" --to "/usr/local/bin/asnmap"
sudo eget "" --to "/usr/local/bin/csvtk"
sudo eget "" --to "/usr/local/bin/cut-cdn"
sudo eget "" --to "/usr/local/bin/dasel"
#dirstat-rs for tree
sudo eget "" --to "/usr/local/bin/ds"
sudo eget "" --to "/usr/local/bin/dnsx"
sudo eget "" --to "/usr/local/bin/httpx"
sudo eget "" --to "/usr/local/bin/inscope"
sudo eget "" --to "/usr/local/bin/jq"
sudo eget "" --to "/usr/bin/jq"
sudo eget "" --to "/usr/local/bin/mapcidr"
sudo eget "" --to "/usr/local/bin/scopegen"
sudo eget "" --to "/usr/local/bin/scopeview"
sudo eget "" --to "/usr/local/bin/spk"
sudo eget "" --to "/usr/local/bin/rclone"
sudo eget "" --to "/usr/local/bin/yq"
continue-on-error: true
- name: Archive Stales
run: |
# Presets
set -x ; set +e
#Create Archive Dir
mkdir -p "$GITHUB_WORKSPACE/main/Raw/Archive" ; cd "$GITHUB_WORKSPACE/main/Raw/Archive"
#Remove +7 days
#find "$GITHUB_WORKSPACE/main/Raw/Archive" -name "*.7z" -type f -mtime +8 -exec rm {} \;
#Generate a range for the last 8 days
for i in {0..7}; do TZ='Asia/Kathmandu' date -d "$i days ago" +'%Y_%m_%d'; done > "/tmp/7day_range.txt"
find "$GITHUB_WORKSPACE/main/Raw/Archive" -type f -exec basename {} \; | grep -v -f "/tmp/7day_range.txt" | xargs -I {} echo ./{} | sort -u | xargs -I {} rm {} -rf
# Find .7z files older than 24(+1) hours & Archive
find "$GITHUB_WORKSPACE/main/Raw/Latest" -maxdepth 1 -type f ! -name "*$(date +'%Y_%m_%d')*" -exec mv {} "$GITHUB_WORKSPACE/main/Raw/Archive" \;
#find "$GITHUB_WORKSPACE/main/Raw/Latest" -name "*.7z" -type f -mmin +1500 -exec mv {} "$GITHUB_WORKSPACE/main/Raw/Archive" \;
# Update new time metadata
find "$GITHUB_WORKSPACE/main/Raw/Archive" -name "*.7z" -exec touch {} \;
continue-on-error: true
- name: Parse New .7z
run: |
# Presets
set -x ; set +e
#chdir to /tmp
mkdir -p "/tmp/Unzipped" ; cd "/tmp/Unzipped"
#Copy .7z
cp $GITHUB_WORKSPACE/main/Raw/Latest/*.7z /tmp/Unzipped
find "/tmp/Unzipped" -iname "certstream*.7z" -exec sh -c '7z x "{}" -o"$(dirname "{}")/$(basename "{}" .7z)"' \;
#Del .7z
find "/tmp/Unzipped" -type f -iname "certstream*.7z" -exec rm {} \; 2>/dev/null
#Cat {} +
#find "/tmp/Unzipped" -type f -iname "certstream_domains.txt" -exec cat {} \; 2>/dev/null | sort -u -o "/tmp/certstream_domains.txt" ; wc -l < "/tmp/certstream_domains.txt"
find "/tmp/Unzipped" -type f -iname "certstream_domains.txt" -exec bash -c 'file -b --mime-type "$1" | grep -q "text/plain" && cat "$1"' bash {} \; 2>/dev/null | sort -u -o "/tmp/certstream_domains.txt" ; wc -l < "/tmp/certstream_domains.txt"
echo -e "\n[+] Total Size (certstream_domains_latest.txt): $(du -h /tmp/certstream_domains.txt | awk '{print $1}')\n"
continue-on-error: true
- name: Parse Old .7z
run: |
# Presets
set -x ; set +e
#chdir to /tmp
mkdir -p "/tmp/Unzipped_7days" ; cd "/tmp/Unzipped_7days"
#Copy .7z
cp $GITHUB_WORKSPACE/main/Raw/Archive/*.7z /tmp/Unzipped_7days
find "/tmp/Unzipped_7days" -iname "certstream*.7z" -exec sh -c '7z x "{}" -o"$(dirname "{}")/$(basename "{}" .7z)"' \;
#Del .7z
find "/tmp/Unzipped_7days" -type f -iname "certstream*.7z" -exec rm {} \; 2>/dev/null
#Cat {} +
#find "/tmp/Unzipped_7days" -type f -iname "certstream_domains.txt" -exec cat {} \; 2>/dev/null | sort -u -o "/tmp/certstream_domains_7days.txt" ; wc -l < "/tmp/certstream_domains_7days.txt"
find "/tmp/Unzipped_7days" -type f -iname "certstream_domains.txt" -exec bash -c 'file -b --mime-type "$1" | grep -q "text/plain" && cat "$1"' bash {} \; 2>/dev/null | sort -u -o "/tmp/certstream_domains_7days.txt" ; wc -l < "/tmp/certstream_domains_7days.txt"
export SSL_7DAYS_TOTAL="$(wc -l < /tmp/certstream_domains_7days.txt)"
echo -e "\n[+] Total Subs (certstream_domains_7days.txt): $SSL_7DAYS_TOTAL"
echo -e "[+] Total Size (certstream_domains_7days.txt): $(du -h /tmp/certstream_domains_7days.txt | awk '{print $1}')\n"
continue-on-error: true
- name: Parse External Sources (NRD)
run: |
# Presets
set -x ; set +e
#Get Base URL
BASE_URL="$(curl -qfsSL "" -H "User-Agent: $USER_AGENT" | grep -oE 'https://www\.cubdomain\.com/domains-registered-by-date/[0-9]{4}-[0-9]{2}-[0-9]{2}/[0-9]+' | sort -u | sort -V | tail -n 1 | sed 's/\/[^/]*$//')" && export BASE_URL="$BASE_URL" && echo -e "\n[+] Base URL: $BASE_URL"
#Get latest HTML
curl -qfsSL "$(date +'%Y-%m-%d')/1" -H "User-Agent: $USER_AGENT" || curl -qfsSL "$(date -d 'yesterday' +'%Y-%m-%d')/1" -H "User-Agent: $USER_AGENT" > "/tmp/cubdomain_daily.html"
curl -qfsSL "$BASE_URL/1" -H "User-Agent: $USER_AGENT" >> "/tmp/cubdomain_daily.html"
#Get Number of Pages
PAGES_NUM="$(grep -oE 'https://www\.cubdomain\.com/domains-registered-by-date/[0-9]{4}-[0-9]{2}-[0-9]{2}/[0-9]+' "/tmp/cubdomain_daily.html" | awk -F '/' '{print $NF}' | sort -u | sort -V | tail -n 1)" && export PAGES_NUM="$PAGES_NUM" && echo -e "\n[+] Total Pages: $PAGES_NUM"
for ((i=1; i<=$PAGES_NUM; i++)); do
#echo -e "\n[+] Fetching Domains from : $BASE_URL/$i"
curl -qfsSL "$BASE_URL/$i" -H "User-Agent: $USER_AGENT" | grep -o '<a href=https://www\.cubdomain\.com/site/[^>]*>[^<]*' | sed 's/.*>//'
done > "/tmp/cubdomain_daily.txt"
echo -e "\n[+] Parsing Domains"
sed '/^\s*$/d' "/tmp/cubdomain_daily.txt" | sed 's/[[:space:]]//g' | sort -u -o "/tmp/cubdomain_daily.txt"
echo -e "[+] Total Domains: $(wc -l < /tmp/cubdomain_daily.txt)"
echo -e "[+] Total Size: $(du -sh /tmp/cubdomain_daily.txt | awk '{print $1}')"
cat "/tmp/cubdomain_daily.txt" | anew-rs -q "/tmp/certstream_domains.txt"
cat "/tmp/cubdomain_daily.txt" | anew-rs -q "/tmp/nrd_latest.txt"
curl -qfsSL "" | sed '/^\s*$/d' | sed 's/[[:space:]]//g' | sort -u -o "/tmp/shrestha_weekly.txt"
cat "/tmp/shrestha_weekly.txt" | anew-rs -q "/tmp/certstream_domains_7days.txt"
cat "/tmp/shrestha_weekly.txt" | anew-rs -q "/tmp/nrd_weekly.txt"
# (
# pushd "$(mktemp -d)" > /dev/null 2>&1
# #Fetch
# curl -qfsSL "" | DAY_RANGE="2" bash
# #cat
# find . -type f -name '*.txt' -exec cat {} \; | sed '/^\s*$/d' | sed 's/[[:space:]]//g' | sort -u -o "/tmp/whoisds_daily.txt"
# popd > /dev/null 2>&1
# #anew
# cat "/tmp/whoisds_daily.txt" | anew-rs -q "/tmp/certstream_domains.txt"
# cat "/tmp/whoisds_daily.txt" | anew-rs -q "/tmp/nrd_latest.txt"
pushd "$(mktemp -d)" > /dev/null 2>&1
curl -qfsSLJO ""
curl -qfsSLJO ""
find . -type f -name "*.gz" -exec gzip -d {} \; && find . -type f -name "*.gz" -exec rm {} \;
find . -type f -exec cat {} \; | sed '/^\s*$/d' | sed 's/[[:space:]]//g' | sort -u -o "/tmp/maaaaz_daily.txt"
cat "/tmp/maaaaz_daily.txt" | anew-rs -q "/tmp/certstream_domains.txt"
cat "/tmp/maaaaz_daily.txt" | anew-rs -q "/tmp/nrd_latest.txt"
popd > /dev/null 2>&1
curl -qfsSL "" | grep -oE '\|\|(.*?)\^' | sed 's/||\(.*\)\^/\1/' | sed '/^\s*$/d' | sed 's/[[:space:]]//g' | sort -u -o "/tmp/systemjargon_weekly.txt"
cat "/tmp/systemjargon_weekly.txt" | anew-rs -q "/tmp/certstream_domains_7days.txt"
cat "/tmp/systemjargon_weekly.txt" | anew-rs -q "/tmp/nrd_weekly.txt"
continue-on-error: true
- name: 🖳🇨🇭 Get Runners Metadata 🔒🔑
run: |
# Presets
set +x ; set +e
echo '```console' > "$GITHUB_WORKSPACE/main/"
find "$GITHUB_WORKSPACE/main/.github/SERVERS" -type f -name '*meta.txt' -exec cat {} \; | tee -a "$GITHUB_WORKSPACE/main/"
echo '```' >> "$GITHUB_WORKSPACE/main/"
#Ping Runners
echo '${{ secrets.SF_SSH_HOSTS }}' > "/tmp/"
dos2unix --quiet "/tmp/" ; chmod +xwr "/tmp/"
tmux new-session -s "segfault" -d "bash /tmp/"
sleep 10 ; while tmux has-session -t "segfault" ; do sleep 1 ; done
echo -e "\n\n" ; cat "/tmp/sf.log" ; echo -e "\n\n"
continue-on-error: true
- name: Update
run: |
# Presets
set -x ; set +e
#cat INFO
echo -e "\n" > "$GITHUB_WORKSPACE/main/"
#Last 24 Hr
echo '---' >> "$GITHUB_WORKSPACE/main/"
echo -e "- #### [🖨️ **Stats** \`24Hr\`⏲️ ➼ $(date +'%Y_%m_%d')](" >> "$GITHUB_WORKSPACE/main/"
echo '```console' >> "$GITHUB_WORKSPACE/main/"
echo -e "" >> "$GITHUB_WORKSPACE/main/"
echo -e "\n--> 🌐 Total" >> "$GITHUB_WORKSPACE/main/"
echo -e "[+] New/ReNewed SSL Certs (ALL): +$(wc -l < /tmp/certstream_domains.txt)" >> "$GITHUB_WORKSPACE/main/"
echo -e "[+] View/Download:\n" >> "$GITHUB_WORKSPACE/main/"
echo '```' >> "$GITHUB_WORKSPACE/main/"
echo -e "" >> "$GITHUB_WORKSPACE/main/"
echo '---' >> "$GITHUB_WORKSPACE/main/"
echo -e "- #### [🖨️ **Stats** \`7Days\`⏲️ ➼ $(date +'%Y_%m_%d') <--> $(date -d "7 days ago" +'%Y_%m_%d')](" >> "$GITHUB_WORKSPACE/main/"
echo '```console' >> "$GITHUB_WORKSPACE/main/"
echo -e "" >> "$GITHUB_WORKSPACE/main/"
echo -e "\n--> 🌐 Total" >> "$GITHUB_WORKSPACE/main/"
echo -e "[+] New/ReNewed SSL Certs (ALL): +${{ env.SSL_7DAYS_TOTAL }}" >> "$GITHUB_WORKSPACE/main/"
echo -e "[+] View/Download:\n" >> "$GITHUB_WORKSPACE/main/"
echo '```' >> "$GITHUB_WORKSPACE/main/"
echo -e "" >> "$GITHUB_WORKSPACE/main/"
echo '---' >> "$GITHUB_WORKSPACE/main/"
echo -e "" >> "$GITHUB_WORKSPACE/main/"
#cat INFO
continue-on-error: true
- name: Get DateTime
run: |
# Date Time
NEPALI_TIME=$(TZ='Asia/Kathmandu' date +'%Y-%m-%d (%I:%M:%S %p)')
- name: Git Pull
run: |
cd "$GITHUB_WORKSPACE/main" && git pull origin main
continue-on-error: true
- uses: stefanzweifel/git-auto-commit-action@v5
repository: ./main
commit_user_name: Azathothas # defaults to "github-actions[bot]"
commit_user_email: # defaults to "41898282+github-actions[bot]"
commit_message: "✅ ⚓ Filter | Parsed 🔰 CertStream Data 🌊 <-- ${{ env.NEPALI_TIME }} ⌚"
#push_options: '--force'
- name: rClone Upload to R2 ("")
run: |
# Presets
set +x ; set +e
#touch "$HOME/.rclone.conf"
echo "${{ secrets.RCLONE_CF_R2_PUB }}" > "$HOME/.rclone.conf"
R2_ARCHIVE="$(date --utc +'%Y_%m_%d')" && export R2_ARCHIVE="${R2_ARCHIVE}" ; echo "R2_ARCHIVE: ${R2_ARCHIVE}" && echo "${R2_ARCHIVE}" > "/tmp/R2_ARCHIVE.txt"
R2_ARCHIVE_P="$(date --utc -d "$(date --utc +'%Y-%m-%d') - 1 day" +'%Y_%m_%d')" && export R2_ARCHIVE_P="${R2_ARCHIVE_P}" ; echo "R2_ARCHIVE_P: ${R2_ARCHIVE_P}" && echo "${R2_ARCHIVE_P}" >> "/tmp/R2_ARCHIVE.txt"
##Fetch Today's Archive
rm -rf "/tmp/R2ARCHIVE" ; mkdir -p "/tmp/R2ARCHIVE" ; cd "/tmp/R2ARCHIVE"
readarray -t dates < "/tmp/R2_ARCHIVE.txt"
for date in "${dates[@]}"; do
echo -e "\n[+] Processing $date\n"
find "$GITHUB_WORKSPACE/main/Raw/" -iname "*$date*.7z" -exec cp {} "/tmp/R2ARCHIVE" \; 2>/dev/null
cd "/tmp/R2ARCHIVE" ; find "/tmp/R2ARCHIVE" -iname "*$date*.7z" -exec sh -c '7z x "{}" -o"$(dirname "{}")/$(basename "{}" .7z)"' \; 2>/dev/null
find "/tmp/R2ARCHIVE" -iname "*$date*.7z" -exec rm -rf {} \; 2>/dev/null
find "/tmp/R2ARCHIVE" -type f -iname "certstream_domains.txt" -exec bash -c 'file -b --mime-type "$1" | grep -q "text/plain" && cat "$1"' bash {} \; 2>/dev/null | sort -u -o "/tmp/$date.txt" ; wc -l "/tmp/$date.txt"
find "/tmp/R2ARCHIVE" -type d -iname "*$date*" -exec rm -rf {} \; 2>/dev/null
7z a -t7z -mx="9" -mmt="$(($(nproc)+1))" "/tmp/$date.7z" "/tmp/$date.txt"
cp "/tmp/$date.7z" "/tmp/R2ARCHIVE/$date.7z" ; du -sh "/tmp/R2ARCHIVE/$date.7z"
echo -e "\n[+] Generated (/tmp/R2ARCHIVE/$date.7z) [$date]\n"
cd "/tmp/R2ARCHIVE" && rclone sync "./" "r2:/pub/datasets/certstream/Temp/" --user-agent="$USER_AGENT" --buffer-size="100M" --s3-upload-concurrency="500" --s3-chunk-size="100M" --multi-thread-streams="500" --checkers="2000" --transfers="1000" --retries="10" --check-first --checksum --copy-links --fast-list --progress
echo -e "\n[+] Uploading Results to R2 (rclone)\n"
sort -u "/tmp/certstream_domains.txt" -o "/tmp/certstream_domains.txt"
rclone copyto "/tmp/certstream_domains.txt" "r2:/pub/datasets/certstream/all_latest.txt" --user-agent="$USER_AGENT" --buffer-size="100M" --s3-upload-concurrency="500" --s3-chunk-size="100M" --multi-thread-streams="500" --checkers="2000" --transfers="1000" --retries="10" --check-first --checksum --copy-links --fast-list --progress
sort -u "/tmp/certstream_domains_7days.txt" -o "/tmp/certstream_domains_7days.txt"
rclone copyto "/tmp/certstream_domains_7days.txt" "r2:/pub/datasets/certstream/all_weekly.txt" --user-agent="$USER_AGENT" --buffer-size="100M" --s3-upload-concurrency="500" --s3-chunk-size="100M" --multi-thread-streams="500" --checkers="2000" --transfers="1000" --retries="10" --check-first --checksum --copy-links --fast-list --progress
sort -u "/tmp/nrd_latest.txt" -o "/tmp/nrd_latest.txt"
rclone copyto "/tmp/nrd_latest.txt" "r2:/pub/datasets/certstream/nrd_latest.txt" --user-agent="$USER_AGENT" --buffer-size="100M" --s3-upload-concurrency="500" --s3-chunk-size="100M" --multi-thread-streams="500" --checkers="2000" --transfers="1000" --retries="10" --check-first --checksum --copy-links --fast-list --progress
sort -u "/tmp/nrd_weekly.txt" -o "/tmp/nrd_weekly.txt"
rclone copyto "/tmp/nrd_weekly.txt" "r2:/pub/datasets/certstream/nrd_weekly.txt" --user-agent="$USER_AGENT" --buffer-size="100M" --s3-upload-concurrency="500" --s3-chunk-size="100M" --multi-thread-streams="500" --checkers="2000" --transfers="1000" --retries="10" --check-first --checksum --copy-links --fast-list --progress
continue-on-error: true