9.9){ return number_format($d); }
elseif ($d>0.0){ return number_format($d,1); }
else { return " "; }
}
// PAGE ////////////////////////////////////////////////
MkMenuForm($Tests,$SelectedTest,$Langs,NULL);
$Row = $Tests[$SelectedTest];
$TestName = $Row[TEST_NAME];
$TestTag = $Row[TEST_TAG];
$TestLink = $Row[TEST_LINK];
list($Succeeded,$Failed,$Special,$Labels,$Ratios) = $Data;
unset($Data);
$first = 0;
$NString = 'N=?';
foreach($Succeeded as $d){
if ($d[DATA_TESTVALUE]>0){
$testValue = (double)$d[DATA_TESTVALUE];
$NString = 'N='.number_format($testValue);
break;
}
}
// BEWARE - Hard coded values - BEWARE
if ($TestName=='fasta'||$TestName=='k-nucleotide'||
$TestName=='reverse-complement'||$TestName=='regex-dna'){
if ($d[DATA_TESTVALUE] == 25000000) { $NString = '≈240MB '.$NString; }
elseif ($d[DATA_TESTVALUE] == 2500000) { $NString = '≈24MB '.$NString; }
elseif ($d[DATA_TESTVALUE] == 5000000) { $NString = '≈50MB '.$NString; }
elseif ($d[DATA_TESTVALUE] == 1000000) { $NString = '≈10MB '.$NString; }
elseif ($d[DATA_TESTVALUE] == 500000) { $NString = '≈5MB '.$NString; }
}
if ($TestName=='startup'){ $NString = ''; }
// Use the table column headers to emphasize the row sort order
$CPU_sort_td = '
sort | ';
$MEM_sort_td = '
sort | ';
$ELAPSED_sort_td = '
sort | ';
$GZBYTES_sort_td = '
sort | ';
if ($Sort=='fullcpu'){
$CPU_sort_td = '
| ';
$Chart_intro_1 = '
how many times slower';
$Chart_intro_2 = 'program was, compared to the fastest program';
} elseif ($Sort=='kb'){
$MEM_sort_td = '
| ';
$Chart_intro_1 = '
how many times more Memory';
$Chart_intro_2 = 'program used, compared to the program that used least Memory';
} elseif ($Sort=='elapsed'){
$ELAPSED_sort_td = '
| ';
$Chart_intro_1 = '
how many times slower';
$Chart_intro_2 = 'program was, compared to the fastest program';
} elseif ($Sort=='gz'){
$GZBYTES_sort_td = '
| ';
$Chart_intro_1 = '
how many times more Code';
$Chart_intro_2 = 'program used, compared to the program that used least Code';
}
if ($CanonicalPage){ echo '
'; }
?>
Each chart bar shows , one ↓ .
These are not the only programs that could be written. These are not the only compilers and interpreters. These are not the only programming languages.
Column × shows how many times more each program used compared to the benchmark program that used least.
diff program output for this 250KB input file (generated with the fasta program N = 25000) with this output file to check your program is correct before contributing.
We are trying to show the performance of various programming language implementations - so we ask that contributed programs not only give the correct result, but also use the same algorithm to calculate that result.
We use FASTA files generated by the fasta benchmark as input for this benchmark. Note: the file may include both lowercase and uppercase codes.
Each program should
- read line-by-line a redirected FASTA format file from stdin
- extract DNA sequence THREE
- define a procedure/function to update a hashtable of k-nucleotide keys and count values, for a particular reading-frame — even though we'll combine k-nucleotide counts for all reading-frames (grow the hashtable from a small default size)
- use that procedure/function and hashtable to
- count all the 1-nucleotide and 2-nucleotide sequences, and write the code and percentage frequency, sorted by descending frequency and then ascending k-nucleotide key
- count all the 3- 4- 6- 12- and 18-nucleotide sequences, and write the count and code for the specific sequences GGT GGTA GGTATT GGTATTTTAATT GGTATTTTAATTTATAGT
In practice, less brute-force would be used to calculate k-nucleotide frequencies, for example Virus Classification using k-nucleotide Frequencies and A Fast Algorithm for the Exhaustive Analysis of 12-Nucleotide-Long DNA Sequences. Applications to Human Genomics (105KB pdf).