diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/ruby-tools/.gitignore | 2 | ||||
| -rw-r--r-- | tools/ruby-tools/Gemfile | 4 | ||||
| -rw-r--r-- | tools/ruby-tools/Gemfile.lock | 17 | ||||
| -rwxr-xr-x | tools/ruby-tools/gen.sh | 4 | ||||
| -rwxr-xr-x | tools/ruby-tools/generate-surge-geosite.rb | 192 | 
5 files changed, 219 insertions, 0 deletions
| diff --git a/tools/ruby-tools/.gitignore b/tools/ruby-tools/.gitignore new file mode 100644 index 0000000..91459db --- /dev/null +++ b/tools/ruby-tools/.gitignore @@ -0,0 +1,2 @@ +ChinaRuleSet.txt +GlobalRuleSet.txt diff --git a/tools/ruby-tools/Gemfile b/tools/ruby-tools/Gemfile new file mode 100644 index 0000000..f69d323 --- /dev/null +++ b/tools/ruby-tools/Gemfile @@ -0,0 +1,4 @@ +source "https://rubygems.org" + +gem "sorbet", group: :development +gem "sorbet-runtime" diff --git a/tools/ruby-tools/Gemfile.lock b/tools/ruby-tools/Gemfile.lock new file mode 100644 index 0000000..94b729b --- /dev/null +++ b/tools/ruby-tools/Gemfile.lock @@ -0,0 +1,17 @@ +GEM +  remote: https://rubygems.org/ +  specs: +    sorbet (0.5.11305) +      sorbet-static (= 0.5.11305) +    sorbet-runtime (0.5.11305) +    sorbet-static (0.5.11305-universal-darwin) + +PLATFORMS +  x86_64-darwin-23 + +DEPENDENCIES +  sorbet +  sorbet-runtime + +BUNDLED WITH +   2.5.7 diff --git a/tools/ruby-tools/gen.sh b/tools/ruby-tools/gen.sh new file mode 100755 index 0000000..6c8438e --- /dev/null +++ b/tools/ruby-tools/gen.sh @@ -0,0 +1,4 @@ +#! /bin/sh + +./generate-surge-geosite.rb china > ChinaRuleSet.txt +./generate-surge-geosite.rb global > GlobalRuleSet.txt diff --git a/tools/ruby-tools/generate-surge-geosite.rb b/tools/ruby-tools/generate-surge-geosite.rb new file mode 100755 index 0000000..d6a5113 --- /dev/null +++ b/tools/ruby-tools/generate-surge-geosite.rb @@ -0,0 +1,192 @@ +#!/usr/bin/env ruby +# typed: true +require "sorbet-runtime" +require "set" +require "tmpdir" + +extend T::Sig + +START_FILES = +  T.let( +    %w[ +      github +      google +      youtube +      twitter +      facebook +      discord +      reddit +      wikimedia +      stackexchange +      libgen +      python +      ruby +      creativecommons +      sci-hub +      v2ray +      imgur +      npmjs +      onedrive +      matrix +    ], +    T::Array[String] +  ) + +sig { returns(T::Array[String]) } +def download_data() +  # Create a temp directory +  temp_dir = Dir.mktmpdir + +  # download url to temp_dir +  zip_file = File.join(temp_dir, "domain-list-community-master.zip") +  # zip_file = "master.zip" + +  url = +    "https://github.com/v2fly/domain-list-community/archive/refs/heads/master.zip" + +  `curl -sfL '#{url}' -o #{zip_file}` + +  `unzip #{zip_file} -d #{temp_dir}` + +  data_dir = File.join(temp_dir, "domain-list-community-master", "data") +  raise "data dir not found" if not Dir.exist?(data_dir) + +  [data_dir, temp_dir] +end + +class Entry +  extend T::Sig + +  sig { params(type: String, value: String, attributes: T::Array[String]).void } +  def initialize(type, value, attributes) +    @type = type +    @value = value +    @attributes = attributes +  end + +  attr_reader :type, :value, :attributes + +  def to_s +    "#{type},#{value}" +  end +end + +# Return nil if the line is a comment or empty +# Return a string if the line is an include +# Return an Entry if the line is a rule +sig { params(line: String).returns(T.nilable(T.any(Entry, String))) } +def handle_line(line) +  line.strip! +  return if line.empty? +  return if line.start_with?("#") + +  fields = +    T +      .must(line.split("#")[0]) +      .split(" ") +      .map { |s| s.strip } +      .filter { |s| not s.empty? } + +  rule = T.let(T.must(fields[0]), String) + +  attributes = T.let([], T::Array[String]) + +  for attribute in T.must(fields[1..]) +    if attribute.start_with?("@") +      attributes << T.must(attribute[1..]) +    else +      raise "Invalid attribute: #{attribute}" +    end +  end + +  type = T.let("", String) +  value = T.let("", String) + +  if rule.start_with?("include:") +    return rule["include:".length..] +  elsif rule.start_with?("domain:") +    type = "DOMAIN-SUFFIX" +    value = T.must(rule["domain:".length..]) +  elsif rule.start_with?("full:") +    type = "DOMAIN" +    value = T.must(rule["full:".length..]) +  elsif rule.start_with?("keyword:") +    type = "DOMAIN-KEYWORD" +    value = T.must(rule["keyword:".length..]) +  elsif rule.start_with?("regexp:") +    type = "URL-REGEX" +    value = T.must(rule["regexp:".length..]) +  else +    type = "DOMAIN-SUFFIX" +    value = rule +  end + +  Entry.new(type, value, attributes) +end + +sig do +  params( +    filename: String, +    data_dir: String, +    already_handled_files: T::Set[String], +    entries: T::Array[Entry] +  ).void +end +def handle_file(filename, data_dir, already_handled_files, entries) +  return if already_handled_files.include?(filename) +  already_handled_files.add filename +  file_path = File.join(data_dir, filename) +  # Read as UTF-8 +  File.open(file_path, "r:UTF-8") do |file| +    file.each_line() do |line| +      line_result = handle_line(line) +      if line_result.is_a?(Entry) +        entries << line_result +      elsif line_result.is_a?(String) +        handle_file(line_result, data_dir, already_handled_files, entries) +      end +    end +  end +end + +sig do +  params(data_dir: String, start_files: T::Array[String]).returns( +    T::Array[Entry] +  ) +end +def handle_data(data_dir, start_files) +  already_handled_files = T.let(Set.new, T::Set[String]) +  result = T.let([], T::Array[Entry]) +  for filename in start_files +    handle_file(filename, data_dir, already_handled_files, result) +  end +  result +end + +sig { params(entries: T::Array[Entry]).returns(T::Array[Entry]) } +def get_entries_with_no_attribute(entries) +  entries.filter { |entry| entry.attributes.empty? } +end + +sig do +  params(entries: T::Array[Entry], attribute: String).returns(T::Array[Entry]) +end +def get_entries_with_attribute(entries, attribute) +  entries.filter { |entry| entry.attributes.include?(attribute) } +end + +sig { params(entries: T::Array[Entry]).void } +def print_entries(entries) +  entries.each { |entry| puts entry.to_s } +end + +ARGV.length == 1 or raise "Usage: generate-surge-geosite.rb <china|global>" +mode = ARGV[0] +mode == "china" or mode == "global" or raise "Invalid mode: #{mode}" + +data_dir, temp_dir = download_data() +entries = handle_data(T.must(data_dir), START_FILES) +print_entries(get_entries_with_no_attribute(entries)) if mode == "global" +print_entries(get_entries_with_attribute(entries, "cn")) if mode == "china" + +FileUtils.remove_entry(T.must(temp_dir)) | 
