Segmential search
Segmential search
It's something close to segments tree, but a list is used (+binary search on it) instead of a tree.
It requires ''input data (points) to be sorted''.
So, the idea is: instead of search a point in a list of points, combine them into segments (intervals/ranges) - we assume that there are consecutive points (like 1,2,3,4 - they can be combined into 1..4). A sequence of points like 1, 5, 7 are combined into segments 1..1, 5..5, 7..7 - not good case, so the algorithm makes sense when segments (which length > 1) number is big.
We find the segment contained the point with binary search. Then we find the offset of the point inside the segment (its index) - without any search, just arithmetic. Then the offset of the segment - again, just arithmetic. So, it's faster than binary search on plain points.
import random import functools def swap(x, y): return (y, x) @functools.total_ordering class Seg: '''Normalized segment''' def __init__(self, a, b=None): if b is None: b = a if a > b: a, b = swap(a, b) self.a = a self.b = b def __contains__(self, pt): return self.a <= pt <= self.b def __len__(self): return self.b - self.a + 1 def __eq__(self, oth): assert type(oth) is type(self) self.a == oth.a and self.b == oth.b def __lt__(self, oth): assert type(oth) is type(self) return self.a < oth.a def __repr__(self): return f'{self.a}..{self.b}' def cmp_pt(self, pt): if pt < self.a: return 1 elif pt > self.b: return -1 else: return 0 def segs_from_points(pts): seg = None d = 0 for pt in pts: if seg is None: seg = Seg(pt) else: d = pt - seg.b if 0 > d: raise ValueError('Unordered points') elif 0 == d: pass elif 1 == d: seg.b += 1 elif 1 < d: yield seg seg = Seg(pt) # Usual binary search but of the point in segments, # so cmp_pt() is used def find_seg(segs, pt): start = 0 end = len(segs) - 1 while start <= end: mid = (start + end) // 2 mid_el = segs[mid] d = mid_el.cmp_pt(pt) if d > 0: end = mid - 1 elif d < 0: start = mid + 1 else: return mid return -1 def find(segs, pt): segi = find_seg(ACCESS(segs, _cap='seg'), pt) if segi >= 0: seg = segs[segi] print(f"Found seg: {seg}") off_in_seg = pt - seg.a fnd = 0 for i in range(segi): fnd += len(segs[i]) fnd += off_in_seg return fnd else: return -1 def partiallyrandomdata(num, ratio=0.5): res = list(range(num)) i = 0 cnt = min(num, num*ratio) while i < cnt: r = random.randint(num, num + cnt - 1) res[r - num] = r i += 1 res = sorted(res) return (res, random.choice(res)) class ACCESS(list): def __init__(self, *a, **kw): self.cap = kw.pop('_cap', '') if self.cap: self.cap += ' ' super().__init__(*a, **kw) self._access = 0 def __getitem__(self, i): print(f"{self.cap}{self._access}: get {i}") self._access += 1 return super().__getitem__(i) NUM = 10000 arr, x = partiallyrandomdata(NUM, 0.5) segs = list(segs_from_points(arr)) print(f"Look for {x} in {arr}") print(f"Segs: {segs}") fnd = find(segs, x) print(f"Looked for {x} and found {arr[fnd]} at {fnd}")