summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlassulus <lassulus@lassul.us>2021-01-07 23:15:06 +0100
committerlassulus <lassulus@lassul.us>2021-01-07 23:15:06 +0100
commitfaefe4c3c83ab24fcdd86cdf11f634f7390de22c (patch)
tree75b2820c9d742b48addec4dc45e2644a6cbcacab
parent341a751ea26b33ac6c8b7f661cb9d2bf8e6f21d3 (diff)
parent2aab7aea07d469f60fdfb662b75f707dc70c86a8 (diff)
Merge remote-tracking branch 'ni/master'
-rw-r--r--krebs/5pkgs/simple/urix.nix15
-rw-r--r--lib/default.nix1
-rw-r--r--lib/uri.nix77
3 files changed, 93 insertions, 0 deletions
diff --git a/krebs/5pkgs/simple/urix.nix b/krebs/5pkgs/simple/urix.nix
new file mode 100644
index 000000000..c0db8c975
--- /dev/null
+++ b/krebs/5pkgs/simple/urix.nix
@@ -0,0 +1,15 @@
+let lib = import <stockholm/lib>; in
+{ pkgs }:
+
+# urix - URI eXtractor
+# Extract all the URIs from standard input and write them to standard output!
+# usage: urix < SOMEFILE
+
+pkgs.execBin "urix" {
+ filename = "${pkgs.gnugrep}/bin/grep";
+ argv = [
+ "urix"
+ "-Eo"
+ "\\b${lib.uri.posix-extended-regex}\\b"
+ ];
+}
diff --git a/lib/default.nix b/lib/default.nix
index be9f60f3b..2efeec078 100644
--- a/lib/default.nix
+++ b/lib/default.nix
@@ -12,6 +12,7 @@ let
encodeName = replaceChars ["/"] ["\\x2f"];
};
types = nixpkgs-lib.types // import ./types.nix { inherit lib; };
+ uri = import ./uri.nix { inherit lib; };
xml = import ./xml.nix { inherit lib; };
eq = x: y: x == y;
diff --git a/lib/uri.nix b/lib/uri.nix
new file mode 100644
index 000000000..72ad390b7
--- /dev/null
+++ b/lib/uri.nix
@@ -0,0 +1,77 @@
+{ lib }:
+with lib;
+with builtins;
+rec {
+ # Regular expression to match URIs per RFC3986
+ # From: # http://jmrware.com/articles/2009/uri_regexp/URI_regex.html#uri-40
+ native-regex = ''
+ # RFC-3986 URI component: URI
+ [A-Za-z][A-Za-z0-9+\-.]* : # scheme ":"
+ (?: // # hier-part
+ (?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})* @)?
+ (?:
+ \[
+ (?:
+ (?:
+ (?: (?:[0-9A-Fa-f]{1,4}:){6}
+ | :: (?:[0-9A-Fa-f]{1,4}:){5}
+ | (?: [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){4}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,1} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){3}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,2} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){2}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,3} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}:
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,4} [0-9A-Fa-f]{1,4})? ::
+ ) (?:
+ [0-9A-Fa-f]{1,4} : [0-9A-Fa-f]{1,4}
+ | (?: (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.){3}
+ (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
+ )
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,5} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,6} [0-9A-Fa-f]{1,4})? ::
+ )
+ | [Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+
+ )
+ \]
+ | (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}
+ (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
+ | (?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*
+ )
+ (?: : [0-9]* )?
+ (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
+ | /
+ (?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+
+ (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
+ )?
+ | (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+
+ (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
+ |
+ )
+ (?:\? (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "?" query ]
+ (?:\# (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "#" fragment ]
+ '';
+
+ posix-extended-regex =
+ let
+ removeComment = s:
+ elemAt (match "^((\\\\#|[^#])*)(#.*)?$" s) 0;
+
+ removeWhitespace =
+ replaceStrings [" "] [""];
+
+ moveDashToEndOfCharacterClass = s:
+ let
+ result = match "(.*)\\\\-([^]]+)(].*)" s;
+ s' = elemAt result 0 + elemAt result 1 + "-" + elemAt result 2;
+ in
+ if result != null then
+ moveDashToEndOfCharacterClass s'
+ else
+ s;
+ in
+ concatStrings
+ (foldl' (a: f: map f a) (splitString "\n" native-regex) [
+ removeComment
+ moveDashToEndOfCharacterClass
+ (replaceStrings ["(?:"] ["("])
+ removeWhitespace
+ ]);
+}