Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
houseofcat committed Nov 23, 2023
1 parent 2cb1fe0 commit 0b61bf9
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 3 deletions.
94 changes: 94 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
[*.{cs,vb}]

# IDE0028: Simplify collection initialization
dotnet_diagnostic.IDE0028.severity = silent
dotnet_style_operator_placement_when_wrapping = beginning_of_line
tab_width = 4
indent_size = 4
end_of_line = crlf
dotnet_style_coalesce_expression = true:suggestion
dotnet_style_null_propagation = true:suggestion
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
dotnet_style_prefer_auto_properties = true:silent
dotnet_style_object_initializer = true:suggestion
dotnet_style_prefer_collection_expression = true:suggestion
dotnet_style_collection_initializer = true:suggestion
dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
dotnet_style_prefer_conditional_expression_over_assignment = true:silent
dotnet_style_prefer_conditional_expression_over_return = true:silent
dotnet_style_explicit_tuple_names = true:suggestion

[*.cs]
csharp_indent_labels = one_less_than_current
csharp_using_directive_placement = outside_namespace:silent
csharp_prefer_simple_using_statement = true:suggestion
csharp_prefer_braces = true:silent
csharp_style_namespace_declarations = file_scoped:suggestion
csharp_style_prefer_method_group_conversion = true:silent
csharp_style_prefer_top_level_statements = true:silent
csharp_style_prefer_primary_constructors = true:suggestion
csharp_style_expression_bodied_methods = false:silent
csharp_style_expression_bodied_constructors = false:silent
csharp_style_expression_bodied_operators = false:silent
csharp_style_expression_bodied_properties = true:silent
csharp_style_expression_bodied_indexers = true:silent
csharp_style_expression_bodied_accessors = true:silent
csharp_style_expression_bodied_lambdas = true:silent
csharp_style_expression_bodied_local_functions = false:silent
csharp_space_around_binary_operators = before_and_after

# IDE0090: Use 'new(...)'
dotnet_diagnostic.IDE0090.severity = silent

# CA1854: Prefer the 'IDictionary.TryGetValue(TKey, out TValue)' method
dotnet_diagnostic.CA1854.severity = silent

[*.{cs,vb}]
#### Naming styles ####

# Naming rules

dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion
dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface
dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i

dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion
dotnet_naming_rule.types_should_be_pascal_case.symbols = types
dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case

dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion
dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members
dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case

# Symbol specifications

dotnet_naming_symbols.interface.applicable_kinds = interface
dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
dotnet_naming_symbols.interface.required_modifiers =

dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
dotnet_naming_symbols.types.required_modifiers =

dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
dotnet_naming_symbols.non_field_members.required_modifiers =

# Naming styles

dotnet_naming_style.begins_with_i.required_prefix = I
dotnet_naming_style.begins_with_i.required_suffix =
dotnet_naming_style.begins_with_i.word_separator =
dotnet_naming_style.begins_with_i.capitalization = pascal_case

dotnet_naming_style.pascal_case.required_prefix =
dotnet_naming_style.pascal_case.required_suffix =
dotnet_naming_style.pascal_case.word_separator =
dotnet_naming_style.pascal_case.capitalization = pascal_case

dotnet_naming_style.pascal_case.required_prefix =
dotnet_naming_style.pascal_case.required_suffix =
dotnet_naming_style.pascal_case.word_separator =
dotnet_naming_style.pascal_case.capitalization = pascal_case
dotnet_style_prefer_inferred_tuple_names = true:suggestion
dotnet_style_namespace_match_folder = true:suggestion
1 change: 1 addition & 0 deletions Mnemonic.sln
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{B09A4292-905
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{9D157855-7C1B-4057-AFD1-ABCDC8CF5AC8}"
ProjectSection(SolutionItems) = preProject
.editorconfig = .editorconfig
.gitignore = .gitignore
common.props = common.props
houseofcat.png = houseofcat.png
Expand Down
2 changes: 1 addition & 1 deletion common.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<PropertyGroup>

<!-- Shared Project Properties -->
<TargetFrameworks>net7.0;net8.0</TargetFrameworks>
<TargetFrameworks>net8.0</TargetFrameworks>
<LangVersion>latest</LangVersion>
<NoWarn>1591</NoWarn>
<IsWindows Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Windows)))' == 'true'">true</IsWindows>
Expand Down
19 changes: 17 additions & 2 deletions examples/Mnemonic/Program.cs
Original file line number Diff line number Diff line change
@@ -1,2 +1,17 @@
// See https://aka.ms/new-console-template for more information
Console.WriteLine("Hello, World!");
using Mnemonic.AhoCorasick;
using System;

Console.WriteLine("Mnemonic AhoCorasick StringReplace");

var ac = new AhoCorasickStringReplace();

ac.AddPattern("apple is red.", "apple is yellow.");
ac.AddPattern("apple is green.", "apple is blue.");

ac.BuildFailureLinks();

var input = "My apple is red.";
var output = ac.Replace(input);

Console.WriteLine("Input: " + input);
Console.WriteLine("Output: " + output);
121 changes: 121 additions & 0 deletions src/Mnemonic.AhoCorasick/AhoCorasickStringReplace.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Mnemonic.AhoCorasick;

public sealed class AhoCorasickStringReplace
{
private readonly TrieNode _root = new TrieNode();
private bool _isBuilt = false;

private static readonly string _mustAddFirstError = $"You can only invoke {nameof(AddPattern)} before invoking {nameof(BuildFailureLinks)}.";

public void AddPattern(ReadOnlySpan<char> pattern, string replacement)
{
if (_isBuilt) throw new InvalidOperationException(_mustBuildFirstError);

var node = _root;

foreach (char c in pattern)
{
node.Children.AddIfNotExists(c, new TrieNode { Key = c });
node = node.Children[c];
}

node.IsEndOfPattern = true;
node.Replacement = replacement;
}

public void BuildFailureLinks()
{
if (_isBuilt) return;

var queue = new Queue<TrieNode>();

foreach (var child in _root.Children.Values)
{
child.FailureLinkNode = _root;
queue.Enqueue(child);
}

while (queue.Count > 0)
{
BuildFailureLink(queue);
}

_isBuilt = true;
}

private static readonly string _mustBuildFirstError = $"You must invoke {nameof(BuildFailureLinks)} before attempting to invoke {nameof(Replace)}.";

public string Replace(ReadOnlySpan<char> input)
{
if (!_isBuilt) throw new InvalidOperationException(_mustBuildFirstError);
if (input.Length == 0) return default;

var result = new StringBuilder();
var currentNode = _root;
var i = 0;

while (i < input.Length)
{
if (currentNode.Children.TryGetValue(input[i], out var nextNode))
{
currentNode = nextNode;
i++;
}
else if (currentNode == _root)
{
result.Append(input[i]);
i++;
}
else
{
currentNode = currentNode.FailureLinkNode;
}

if (currentNode.IsEndOfPattern)
{
result.Append(currentNode.Replacement);
currentNode = _root;
}
}

return result.ToString();
}

private void BuildFailureLink(Queue<TrieNode> queue)
{
var currentNode = queue.Dequeue();

foreach (var kvp in currentNode.Children)
{
var currentKey = kvp.Key;
var currentChildNode = kvp.Value;
var currentFailureLinkeNode = currentNode.FailureLinkNode;

while (currentFailureLinkeNode != null
&& !currentFailureLinkeNode.Children.ContainsKey(currentKey))
{
currentFailureLinkeNode = currentFailureLinkeNode.FailureLinkNode;
}

if (currentFailureLinkeNode == null)
{
currentChildNode.FailureLinkNode = _root;
}
else
{
currentChildNode.FailureLinkNode = currentFailureLinkeNode.Children[currentKey];
}

if (currentChildNode.FailureLinkNode.IsEndOfPattern)
{
currentChildNode.IsEndOfPattern = true;
}

queue.Enqueue(currentChildNode);
}
}
}
42 changes: 42 additions & 0 deletions src/Mnemonic.AhoCorasick/Extensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace Mnemonic.AhoCorasick;

public static class Extensions
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void AddIfNotExists<TKey, TValue>(this Dictionary<TKey, TValue> dictionary, TKey key) where TValue : new()
{
ref var valOrNew = ref CollectionsMarshal.GetValueRefOrAddDefault(dictionary, key, out var existed);
if (!existed) { valOrNew = new(); }
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void AddIfNotExists<TKey, TValue>(this Dictionary<TKey, TValue> dictionary, TKey key, TValue value)
{
ref var valOrNew = ref CollectionsMarshal.GetValueRefOrAddDefault(dictionary, key, out var existed);
if (!existed) { valOrNew = value; }
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetStableHashCode(this string str)
{
unchecked
{
var hash1 = 5381;
var hash2 = hash1;

for (var i = 0; i < str.Length && str[i] != '\0'; i += 2)
{
hash1 = ((hash1 << 5) + hash1) ^ str[i];
if (i == str.Length - 1 || str[i + 1] == '\0')
{ break; }
hash2 = ((hash2 << 5) + hash2) ^ str[i + 1];
}

return hash1 + (hash2 * 1566083941);
}
}
}
14 changes: 14 additions & 0 deletions src/Mnemonic.AhoCorasick/TrieNode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System.Collections.Generic;
using System.Diagnostics;

namespace Mnemonic.AhoCorasick;

[DebuggerDisplay("{Replacement}", Name = "{Key}")]
public sealed record TrieNode
{
public char Key { get; set; }
public Dictionary<char, TrieNode> Children { get; } = new Dictionary<char, TrieNode>();
public TrieNode FailureLinkNode { get; set; }
public bool IsEndOfPattern { get; set; }
public string Replacement { get; set; }
}

0 comments on commit 0b61bf9

Please sign in to comment.