Each table row shows performance measurements for this Nuitka program with a particular command-line input value N.
N | CPU secs | Elapsed secs | Memory KB | Code B | ≈ CPU Load |
---|---|---|---|---|---|
10,000 | 0.12 | 0.07 | ? | 2011 | 29% 17% 0% 0% 86% 14% 14% 29% |
Read the ↓ make, command line, and program output logs to see how this program was run.
Read k-nucleotide benchmark to see what this program should do.
# The Computer Language Benchmarks Game # http://benchmarksgame.alioth.debian.org/ # # submitted by Joerg Baumann from os import cpu_count from sys import stdin from collections import defaultdict from itertools import starmap, chain from multiprocessing import Pool lean_buffer = {} def lean_args(sequence, reading_frames, i, j): global lean_buffer lean_key = len(lean_buffer) lean_buffer[lean_key] = sequence return lean_key, reading_frames, i, j class lean_call: def __init__(self, func): self.func = func def __call__(self, lean_key, reading_frames, i, j): global lean_buffer sequence = lean_buffer[lean_key] results = self.func(sequence, reading_frames, i, j) lean_results = [] for frame, n, frequences in results: lean_frequences = defaultdict(int) for reading_frame, bits_list in reading_frames: if reading_frame == frame: for bits in bits_list: lean_frequences[bits] = frequences[bits] lean_results.append((frame, n, lean_frequences)) return lean_results def count_frequencies(sequence, reading_frames, i, j): frames = tuple( sorted([frame for frame,_ in reading_frames], reverse=True)) frequences_mask_list = tuple( ((defaultdict(int), (1 << (2 * frame)) - 1) for frame in frames)) frame = frames[0] frequences, mask = frequences_mask_list[0] short_frame_frequences = frequences_mask_list[1:] mono_nucleotides = [] frame_tail = len(frames) - 1 if frame_tail >= 0 and frames[frame_tail] == 1: freq = frequences_mask_list[frame_tail][0] worklist = sequence[i:j] len_before = len(worklist) while len_before > 0: n = worklist[0:1] worklist = worklist.translate(None, n) len_after = len(worklist) freq[n[0]] = len_before - len_after len_before = len_after mono_nucleotides.append(n) frame_tail -= 1 if frame_tail >= 0 and frames[frame_tail] == 2 and mono_nucleotides: freq = frequences_mask_list[frame_tail][0] worklist = sequence[i:min(j+1, len(sequence))] overlaps = [] for v in (n + m for n in mono_nucleotides for m in mono_nucleotides): bits = v[0]*4+v[1] freq[bits] = worklist.count(v) if v[1:] == v[:1]: overlaps.append((v, bits, v[:1]+v)) for v, bits, pattern in overlaps: count = len(worklist) tmp = worklist.replace(pattern+pattern, b'12') tmp = tmp.replace(pattern, b'1') count = (count - len(tmp)) // 2 count += tmp.count(b'1'+v) count += tmp.count(b'2'+v[:1]) freq[bits] += count frame_tail -= 1 short_frame_frequences = short_frame_frequences[:frame_tail] if len(short_frame_frequences): bits = 0 if i == 0: for k in range(i, i + frame - 1): bits = bits * 4 + sequence[k] for t, (f, m) in enumerate(short_frame_frequences, 1): if k - i + 1 >= frames[t]: f[bits & m] += 1 else: for k in range(i - frame + 1, i): bits = bits * 4 + sequence[k] for byte in sequence[k+1:j]: bits = (bits * 4 + byte) & mask frequences[bits] += 1 for f, m in short_frame_frequences: f[bits & m] += 1 return [ (frame, len(sequence) - frame + 1, frequences_mask_list[i][0]) for i, frame in enumerate(frames)] def read_sequence(file, header, translation) : for line in file: if line[0] == ord('>'): if line[1:len(header)+1] == header: break sequence = bytearray() for line in file: if line[0] == ord('>'): break sequence += line return sequence.translate(translation, b'\n\r\t ') def lookup_frequency(results, frame, bits): n = 1 frequency = 0 for _, n, frequencies in filter(lambda r: r[0] == frame, results): frequency += frequencies[bits] return frequency, n if n > 0 else 1 def display(results, display_list, sort=False, relative=False, end='\n'): lines = [ (k_nucleotide, lookup_frequency(results, frame, bits)) for k_nucleotide, frame, bits in display_list ] if sort: lines = sorted(lines, key=lambda v: (-v[1][0], v[0])) for k_nucleotide, (frequency, n) in lines: if relative: print("{0} {1:.3f}".format(k_nucleotide, frequency * 100. / n)) else: print("{1}\t{0}".format(k_nucleotide, frequency)) print(end=end) def main(): translation = bytes.maketrans(b'GTCAgtca', b'\x00\x01\x02\x03\x00\x01\x02\x03') def str_to_bits(text): buffer = text.encode('latin1').translate(translation) bits = 0 for k in range(len(buffer)): bits = bits * 4 + buffer[k] return bits def display_list(k_nucleotides): return [(n, len(n), str_to_bits(n)) for n in k_nucleotides] sequence = read_sequence(stdin.buffer, b'THREE', translation) mono_nucleotides = ('G', 'A', 'T', 'C') di_nucleotides = tuple(n + m for n in mono_nucleotides for m in mono_nucleotides) k_nucleotides = ( 'GGT', 'GGTA', 'GGTATT', 'GGTATTTTAATT', 'GGTATTTTAATTTATAGT') reading_frames = [ (1, tuple(map(str_to_bits, mono_nucleotides))), (2, tuple(map(str_to_bits, di_nucleotides))), ] + list(map(lambda s: (len(s), (str_to_bits(s),)), k_nucleotides)) if len(sequence) > 128 * cpu_count(): n = cpu_count() else: n = 1 partitions = [len(sequence) * i // n for i in range(n+1)] count_jobs = [ (sequence, reading_frames, partitions[i], partitions[i + 1]) for i in range(len(partitions) - 1)] if n == 1: results = list(chain(*starmap(count_frequencies, count_jobs))) else: lean_jobs = list(starmap(lean_args, count_jobs)) with Pool() as pool: async_results = pool.starmap_async( lean_call(count_frequencies), lean_jobs) results = list(chain(*async_results.get())) display(results, display_list(mono_nucleotides), relative=True, sort=True) display(results, display_list(di_nucleotides), relative=True, sort=True) display(results, display_list(k_nucleotides), end='') if __name__=='__main__' : main()
Sun, 23 Apr 2023 09:14:36 GMT MAKE: make[1]: Vstupuje se do adresáře „/home/dundee/work/pybenchmarks/bencher/tmp/knucleotide/tmp“ nuitka3 --remove-output knucleotide.nuitka-3.nuitka Nuitka-Options:INFO: Used command line options: --remove-output knucleotide.nuitka-3.nuitka Nuitka-Options:WARNING: You did not specify to follow or include anything but main program. Check options and make sure that is intended. Nuitka:INFO: Starting Python compilation with Nuitka '1.5' on Python '3.10' commercial grade 'not installed'. Nuitka-Plugins:INFO: multiprocessing: Injecting pre-module load code for module 'multiprocessing': Nuitka-Plugins:INFO: multiprocessing: Monkey patching "multiprocessing" load environment. Nuitka-Plugins:INFO: multiprocessing: Injecting post-module load code for module 'multiprocessing': Nuitka-Plugins:INFO: multiprocessing: Monkey patching "multiprocessing" for compiled methods. Nuitka:INFO: Completed Python level compilation and optimization. Nuitka:INFO: Generating source code for C backend compiler. Nuitka:INFO: Running data composer tool for optimal constant value handling. Nuitka:INFO: Running C compilation via Scons. Nuitka-Scons:INFO: Backend C compiler: gcc (gcc). Nuitka-Scons:INFO: Backend linking program with 10 files (no progress information available). Nuitka-Scons:WARNING: You are not using ccache. Nuitka:INFO: Removing build directory 'knucleotide.nuitka-3.nuitka.build'. Nuitka:INFO: Successfully created 'knucleotide.nuitka-3.nuitka.bin'. cp knucleotide.nuitka-3.nuitka.bin knucleotide.nuitka-3.nuitka_run make[1]: Opouští se adresář „/home/dundee/work/pybenchmarks/bencher/tmp/knucleotide/tmp“ 20.35s to complete and log all make actions COMMAND LINE: ./knucleotide.nuitka-3.nuitka_run 0 < knucleotide-input10000.txt PROGRAM OUTPUT: A 30.284 T 29.796 C 20.312 G 19.608 AA 9.212 AT 8.950 TT 8.948 TA 8.936 CA 6.166 CT 6.100 AC 6.086 TC 6.042 AG 6.036 GA 5.968 TG 5.868 GT 5.798 CC 4.140 GC 4.044 CG 3.906 GG 3.798 562 GGT 152 GGTA 15 GGTATT 0 GGTATTTTAATT 0 GGTATTTTAATTTATAGT