diff options
-rw-r--r-- | krebs/5pkgs/simple/urix.nix | 15 | ||||
-rw-r--r-- | lib/default.nix | 1 | ||||
-rw-r--r-- | lib/uri.nix | 77 |
3 files changed, 93 insertions, 0 deletions
diff --git a/krebs/5pkgs/simple/urix.nix b/krebs/5pkgs/simple/urix.nix new file mode 100644 index 000000000..c0db8c975 --- /dev/null +++ b/krebs/5pkgs/simple/urix.nix @@ -0,0 +1,15 @@ +let lib = import <stockholm/lib>; in +{ pkgs }: + +# urix - URI eXtractor +# Extract all the URIs from standard input and write them to standard output! +# usage: urix < SOMEFILE + +pkgs.execBin "urix" { + filename = "${pkgs.gnugrep}/bin/grep"; + argv = [ + "urix" + "-Eo" + "\\b${lib.uri.posix-extended-regex}\\b" + ]; +} diff --git a/lib/default.nix b/lib/default.nix index be9f60f3b..2efeec078 100644 --- a/lib/default.nix +++ b/lib/default.nix @@ -12,6 +12,7 @@ let encodeName = replaceChars ["/"] ["\\x2f"]; }; types = nixpkgs-lib.types // import ./types.nix { inherit lib; }; + uri = import ./uri.nix { inherit lib; }; xml = import ./xml.nix { inherit lib; }; eq = x: y: x == y; diff --git a/lib/uri.nix b/lib/uri.nix new file mode 100644 index 000000000..72ad390b7 --- /dev/null +++ b/lib/uri.nix @@ -0,0 +1,77 @@ +{ lib }: +with lib; +with builtins; +rec { + # Regular expression to match URIs per RFC3986 + # From: # http://jmrware.com/articles/2009/uri_regexp/URI_regex.html#uri-40 + native-regex = '' + # RFC-3986 URI component: URI + [A-Za-z][A-Za-z0-9+\-.]* : # scheme ":" + (?: // # hier-part + (?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})* @)? + (?: + \[ + (?: + (?: + (?: (?:[0-9A-Fa-f]{1,4}:){6} + | :: (?:[0-9A-Fa-f]{1,4}:){5} + | (?: [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){4} + | (?: (?:[0-9A-Fa-f]{1,4}:){0,1} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){3} + | (?: (?:[0-9A-Fa-f]{1,4}:){0,2} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){2} + | (?: (?:[0-9A-Fa-f]{1,4}:){0,3} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}: + | (?: (?:[0-9A-Fa-f]{1,4}:){0,4} [0-9A-Fa-f]{1,4})? :: + ) (?: + [0-9A-Fa-f]{1,4} : [0-9A-Fa-f]{1,4} + | (?: (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.){3} + (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) + ) + | (?: (?:[0-9A-Fa-f]{1,4}:){0,5} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4} + | (?: (?:[0-9A-Fa-f]{1,4}:){0,6} [0-9A-Fa-f]{1,4})? :: + ) + | [Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+ + ) + \] + | (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3} + (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) + | (?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})* + ) + (?: : [0-9]* )? + (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )* + | / + (?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+ + (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )* + )? + | (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+ + (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )* + | + ) + (?:\? (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "?" query ] + (?:\# (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "#" fragment ] + ''; + + posix-extended-regex = + let + removeComment = s: + elemAt (match "^((\\\\#|[^#])*)(#.*)?$" s) 0; + + removeWhitespace = + replaceStrings [" "] [""]; + + moveDashToEndOfCharacterClass = s: + let + result = match "(.*)\\\\-([^]]+)(].*)" s; + s' = elemAt result 0 + elemAt result 1 + "-" + elemAt result 2; + in + if result != null then + moveDashToEndOfCharacterClass s' + else + s; + in + concatStrings + (foldl' (a: f: map f a) (splitString "\n" native-regex) [ + removeComment + moveDashToEndOfCharacterClass + (replaceStrings ["(?:"] ["("]) + removeWhitespace + ]); +} |