先贴源码地址
https://github.com/dotnet/corefx/tree/master/src/System.Linq/src
.NET CORE很大一个好处就是代码的开源,你可以详细的查看你使用类的源代码,并学习微软的写法和实现思路。我们这个系列熟悉基本类库是一个目的,另一个目的就是学习微软的实现思路和编程方法。
今天我们就单独讨论的问题是linq中的distinct方法是如何实现。最后还会有我们实际编程时候对distinct方法的扩展。
System.Linq
linq中Distinct方法在Enumerable类中
Enumerable
public static partial class Enumerable
内部去重方法实现有2个重载
1
public static IEnumerableDistinct (this IEnumerable source) => Distinct(source, null);
2
public static IEnumerableDistinct (this IEnumerable source, IEqualityComparer comparer){ if (source == null) { throw Error.ArgumentNull(nameof(source)); } return new DistinctIterator (source, comparer);}
去重迭代器DistinctIterator
private sealed class DistinctIterator
去重迭代器,先把元素都加到Set<TSource> _set;中,然后用set的UnionWith去重
这里的set是内部实现的一个轻量级的hash set 具体代码下一部分介绍
////// An iterator that yields the distinct values in an ///./// The type of the source enumerable. private sealed class DistinctIterator: Iterator , IIListProvider { private readonly IEnumerable _source; private readonly IEqualityComparer _comparer; private Set _set;private IEnumerator _enumerator; public DistinctIterator(IEnumerable source, IEqualityComparer comparer) { Debug.Assert(source != null); _source = source; _comparer = comparer; }public override Iterator Clone() => new DistinctIterator (_source, _comparer); public override bool MoveNext() { switch (_state) { case 1: _enumerator = _source.GetEnumerator(); if (!_enumerator.MoveNext()) { Dispose(); return false; } TSource element = _enumerator.Current; _set = new Set (_comparer); _set.Add(element); _current = element; _state = 2; return true; case 2: while (_enumerator.MoveNext()) { element = _enumerator.Current; if (_set.Add(element)) { _current = element; return true; } } break; } Dispose(); return false; } public override void Dispose() { if (_enumerator != null) { _enumerator.Dispose(); _enumerator = null; _set = null; } base.Dispose(); } private Set FillSet() { Set set = new Set (_comparer); set.UnionWith(_source); return set; } public TSource[] ToArray() => FillSet().ToArray(); public List ToList() => FillSet().ToList(); public int GetCount(bool onlyIfCheap) => onlyIfCheap ? -1 : FillSet().Count;}
Set的UnionWith
这部分其实是distinct实现的重点,所以内容较多。
////// A lightweight hash set./// ///The type of the set's items. internal sealed class Set{变量 /// /// The comparer used to hash and compare items in the set. /// private readonly IEqualityComparer_comparer; /// /// The hash buckets, which are used to index into the slots. /// private int[] _buckets; ////// The slots, each of which store an item and its hash code. /// private Slot[] _slots; ////// An entry in the hash set./// private struct Slot { ////// The hash code of the item. /// internal int _hashCode; ////// In the case of a hash collision(碰撞), the index of the next slot to probe(查看). /// internal int _next; ////// The item held by this slot. /// internal TElement _value; } ////// The number of items in this set. /// private int _count;构造函数 ////// Constructs a set that compares items with the specified comparer. /// /// /// The comparer. If this isnull , it defaults to. /// public Set(IEqualityComparer comparer) { _comparer = comparer ?? EqualityComparer .Default; _buckets = new int[7]; _slots = new Slot[7]; }新增方法 /// /// Attempts to add an item to this set. /// /// The item to add. ////// public bool Add(TElement value) {//根据值获取哈希值 最终调用的是_comparer.GetHashCode(value) int hashCode = InternalGetHashCode(value);//遍历对比值 直接找到对应的桶,遍历桶中的元素 _slots[i]._next最后一个值会是-1,所以会跳出循环 for (int i = _buckets[hashCode % _buckets.Length] - 1; i >= 0; i = _slots[i]._next) { if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value)) { return false; } }//如果超出长度,就扩展 乘以2加1 if (_count == _slots.Length) { Resize(); } int index = _count; _count++; int bucket = hashCode % _buckets.Length;//这里具体桶的位置需要除以总体长度,这样空间利用率更好 _slots[index]._hashCode = hashCode; _slots[index]._value = value; _slots[index]._next = _buckets[bucket] - 1;//桶中前一个元素的位置索引 _buckets[bucket] = index + 1; return true; }去除方法 ///true if the item was not in the set; otherwise,false . ////// Attempts to remove an item from this set. /// /// The item to remove. ////// public bool Remove(TElement value) { int hashCode = InternalGetHashCode(value); int bucket = hashCode % _buckets.Length; int last = -1; for (int i = _buckets[bucket] - 1; i >= 0; last = i, i = _slots[i]._next) { if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value)) { if (last < 0) { _buckets[bucket] = _slots[i]._next + 1; } else { _slots[last]._next = _slots[i]._next; } _slots[i]._hashCode = -1; _slots[i]._value = default(TElement); _slots[i]._next = -1; return true; } } return false; }扩展set ///true if the item was in the set; otherwise,false . ////// Expands the capacity of this set to double the current capacity, plus one. /// private void Resize() { int newSize = checked((_count * 2) + 1);//这个要检测是否超出int长度限制 int[] newBuckets = new int[newSize]; Slot[] newSlots = new Slot[newSize]; Array.Copy(_slots, 0, newSlots, 0, _count);//赋值newSlots数组 for (int i = 0; i < _count; i++) { int bucket = newSlots[i]._hashCode % newSize; newSlots[i]._next = newBuckets[bucket] - 1;//重新记录桶位置 newBuckets[bucket] = i + 1; } _buckets = newBuckets; _slots = newSlots; } ////// Creates an array from the items in this set. /// ///An array of the items in this set. public TElement[] ToArray() { TElement[] array = new TElement[_count]; for (int i = 0; i != array.Length; ++i) { array[i] = _slots[i]._value; } return array; } ////// Creates a list from the items in this set. /// ///A list of the items in this set. public ListToList() { int count = _count; List list = new List (count); for (int i = 0; i != count; ++i) { list.Add(_slots[i]._value); } return list; }UnionWith方法,实际是执行add /// /// The number of items in this set. /// public int Count => _count; ////// Unions this set with an enumerable. /// /// The enumerable. public void UnionWith(IEnumerableother) { Debug.Assert(other != null); foreach (TElement item in other) { Add(item); } }内部哈希方法 /// /// Gets the hash code of the provided value with its sign bit zeroed out, so that modulo has a positive result. /// /// The value to hash. ///The lower 31 bits of the value's hash code. private int InternalGetHashCode(TElement value) => value == null ? 0 : _comparer.GetHashCode(value) & 0x7FFFFFFF;}
扩展distinct的关键
实现IEqualityComparer接口
public interface IEqualityComparer{ // true if the specified objects are equal; otherwise, false. bool Equals(T x, T y); // Returns a hash code for the specified object. // 异常: // T:System.ArgumentNullException: // The type of obj is a reference type and obj is null. int GetHashCode(T obj);}
distinct扩展方法
使用params,支持多字段。
public static class ComparerHelper{ ////// 自定义Distinct扩展方法 /// ///要去重的对象类 /// 要去重的对象 /// 获取自定义去重字段的委托 ///public static IEnumerable DistinctEx (this IEnumerable source, params Func [] getfield) { return source.Distinct(new CompareEntityFields (getfield)); }}public class CompareEntityFields : IEqualityComparer { private readonly Func [] _compareFields; /// /// 可以根据字段比对数据 /// /// 比对字段引用 public CompareEntityFields(params Func[] compareFields) { _compareFields = compareFields; } /// Determines whether the specified objects are equal. /// The first object of type T to compare. /// The second object of type T to compare. ///true if the specified objects are equal; otherwise, false. bool IEqualityComparer.Equals(T x, T y) { if (_compareFields == null || _compareFields.Length <= 0) { return EqualityComparer .Default.Equals(x, y); } bool result = true; foreach (var func in _compareFields) { var xv = func(x); var yv = func(y); result = xv == null && yv == null || Equals(xv, yv); if (!result) break; } return result; } /// Returns a hash code for the specified object. /// Thefor which a hash code is to be returned. /// A hash code for the specified object. ////// The type of int IEqualityComparerobj is a reference type /// andobj is null. ///.GetHashCode(T obj) { return obj.ToString().GetHashCode(); }}