Skip to content

Commit 6581182

Browse files
Add protein-translation exercise (#84)
1 parent 53d1906 commit 6581182

File tree

8 files changed

+408
-0
lines changed

8 files changed

+408
-0
lines changed

config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,14 @@
321321
"prerequisites": [],
322322
"difficulty": 4
323323
},
324+
{
325+
"slug": "protein-translation",
326+
"name": "Protein Translation",
327+
"uuid": "546929bd-ff42-498e-87dc-b50b07e2803b",
328+
"practices": [],
329+
"prerequisites": [],
330+
"difficulty": 4
331+
},
324332
{
325333
"slug": "queen-attack",
326334
"name": "Queen Attack",
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Instructions
2+
3+
Your job is to translate RNA sequences into proteins.
4+
5+
RNA strands are made up of three-nucleotide sequences called **codons**.
6+
Each codon translates to an **amino acid**.
7+
When joined together, those amino acids make a protein.
8+
9+
In the real world, there are 64 codons, which in turn correspond to 20 amino acids.
10+
However, for this exercise, you’ll only use a few of the possible 64.
11+
They are listed below:
12+
13+
| Codon | Amino Acid |
14+
| ------------------ | ------------- |
15+
| AUG | Methionine |
16+
| UUU, UUC | Phenylalanine |
17+
| UUA, UUG | Leucine |
18+
| UCU, UCC, UCA, UCG | Serine |
19+
| UAU, UAC | Tyrosine |
20+
| UGU, UGC | Cysteine |
21+
| UGG | Tryptophan |
22+
| UAA, UAG, UGA | STOP |
23+
24+
For example, the RNA string “AUGUUUUCU” has three codons: “AUG”, “UUU” and “UCU”.
25+
These map to Methionine, Phenylalanine, and Serine.
26+
27+
## “STOP” Codons
28+
29+
You’ll note from the table above that there are three **“STOP” codons**.
30+
If you encounter any of these codons, ignore the rest of the sequence — the protein is complete.
31+
32+
For example, “AUGUUUUCUUAAAUG” contains a STOP codon (“UAA”).
33+
Once we reach that point, we stop processing.
34+
We therefore only consider the part before it (i.e. “AUGUUUUCU”), not any further codons after it (i.e. “AUG”).
35+
36+
Learn more about [protein translation on Wikipedia][protein-translation].
37+
38+
[protein-translation]: https://en.wikipedia.org/wiki/Translation_(biology)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"authors": [
3+
"keiravillekode"
4+
],
5+
"files": {
6+
"solution": [
7+
"protein_translation.fut"
8+
],
9+
"test": [
10+
"test.fut"
11+
],
12+
"example": [
13+
".meta/example.fut"
14+
]
15+
},
16+
"blurb": "Translate RNA sequences into proteins.",
17+
"source": "Tyler Long"
18+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
type amino_acid = #methionine | #phenylalanine | #leucine | #serine | #tyrosine | #cysteine | #tryptophan
2+
3+
local def equal [m] [n] (first: [m]u8) (second: [n]u8): bool =
4+
if m != n then false else
5+
let same = loop same = true for i < n do
6+
same && (first[i] == second[i])
7+
in
8+
same
9+
10+
local def contains (needle: []u8) (haystack: [][]u8): bool =
11+
let found = loop found = false for item in haystack do
12+
found || equal item needle
13+
in
14+
found
15+
16+
local def is_stop (codon: []u8): bool =
17+
contains codon ["UAA", "UAG", "UGA"]
18+
19+
local def translate (codon: []u8): amino_acid =
20+
if contains codon ["AUG"] then #methionine else
21+
if contains codon ["UUU", "UUC"] then #phenylalanine else
22+
if contains codon ["UUA", "UUG"] then #leucine else
23+
if contains codon ["UCU", "UCC", "UCA", "UCG"] then #serine else
24+
if contains codon ["UAU", "UAC"] then #tyrosine else
25+
if contains codon ["UGU", "UGC"] then #cysteine else
26+
assert (contains codon ["UGG"]) #tryptophan
27+
28+
def proteins (strand: []u8): []amino_acid =
29+
let (result, _) = loop (result, index) = ([], 0) while index < length strand do
30+
let index = index + 3
31+
let codon = assert (index <= length strand) strand[index - 3:index]
32+
in
33+
if is_stop codon then (result, length strand) else
34+
(result ++ [translate codon], index)
35+
in
36+
result
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# This is an auto-generated file.
2+
#
3+
# Regenerating this file via `configlet sync` will:
4+
# - Recreate every `description` key/value pair
5+
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
6+
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
7+
# - Preserve any other key/value pair
8+
#
9+
# As user-added comments (using the # character) will be removed when this file
10+
# is regenerated, comments can be added via a `comment` key.
11+
12+
[2c44f7bf-ba20-43f7-a3bf-f2219c0c3f98]
13+
description = "Empty RNA sequence results in no proteins"
14+
15+
[96d3d44f-34a2-4db4-84cd-fff523e069be]
16+
description = "Methionine RNA sequence"
17+
18+
[1b4c56d8-d69f-44eb-be0e-7b17546143d9]
19+
description = "Phenylalanine RNA sequence 1"
20+
21+
[81b53646-bd57-4732-b2cb-6b1880e36d11]
22+
description = "Phenylalanine RNA sequence 2"
23+
24+
[42f69d4f-19d2-4d2c-a8b0-f0ae9ee1b6b4]
25+
description = "Leucine RNA sequence 1"
26+
27+
[ac5edadd-08ed-40a3-b2b9-d82bb50424c4]
28+
description = "Leucine RNA sequence 2"
29+
30+
[8bc36e22-f984-44c3-9f6b-ee5d4e73f120]
31+
description = "Serine RNA sequence 1"
32+
33+
[5c3fa5da-4268-44e5-9f4b-f016ccf90131]
34+
description = "Serine RNA sequence 2"
35+
36+
[00579891-b594-42b4-96dc-7ff8bf519606]
37+
description = "Serine RNA sequence 3"
38+
39+
[08c61c3b-fa34-4950-8c4a-133945570ef6]
40+
description = "Serine RNA sequence 4"
41+
42+
[54e1e7d8-63c0-456d-91d2-062c72f8eef5]
43+
description = "Tyrosine RNA sequence 1"
44+
45+
[47bcfba2-9d72-46ad-bbce-22f7666b7eb1]
46+
description = "Tyrosine RNA sequence 2"
47+
48+
[3a691829-fe72-43a7-8c8e-1bd083163f72]
49+
description = "Cysteine RNA sequence 1"
50+
51+
[1b6f8a26-ca2f-43b8-8262-3ee446021767]
52+
description = "Cysteine RNA sequence 2"
53+
54+
[1e91c1eb-02c0-48a0-9e35-168ad0cb5f39]
55+
description = "Tryptophan RNA sequence"
56+
57+
[e547af0b-aeab-49c7-9f13-801773a73557]
58+
description = "STOP codon RNA sequence 1"
59+
60+
[67640947-ff02-4f23-a2ef-816f8a2ba72e]
61+
description = "STOP codon RNA sequence 2"
62+
63+
[9c2ad527-ebc9-4ace-808b-2b6447cb54cb]
64+
description = "STOP codon RNA sequence 3"
65+
66+
[f4d9d8ee-00a8-47bf-a1e3-1641d4428e54]
67+
description = "Sequence of two protein codons translates into proteins"
68+
69+
[dd22eef3-b4f1-4ad6-bb0b-27093c090a9d]
70+
description = "Sequence of two different protein codons translates into proteins"
71+
72+
[d0f295df-fb70-425c-946c-ec2ec185388e]
73+
description = "Translate RNA strand into correct protein list"
74+
75+
[e30e8505-97ec-4e5f-a73e-5726a1faa1f4]
76+
description = "Translation stops if STOP codon at beginning of sequence"
77+
78+
[5358a20b-6f4c-4893-bce4-f929001710f3]
79+
description = "Translation stops if STOP codon at end of two-codon sequence"
80+
81+
[ba16703a-1a55-482f-bb07-b21eef5093a3]
82+
description = "Translation stops if STOP codon at end of three-codon sequence"
83+
84+
[4089bb5a-d5b4-4e71-b79e-b8d1f14a2911]
85+
description = "Translation stops if STOP codon in middle of three-codon sequence"
86+
87+
[2c2a2a60-401f-4a80-b977-e0715b23b93d]
88+
description = "Translation stops if STOP codon in middle of six-codon sequence"
89+
90+
[f6f92714-769f-4187-9524-e353e8a41a80]
91+
description = "Sequence of two non-STOP codons does not translate to a STOP codon"
92+
93+
[1e75ea2a-f907-4994-ae5c-118632a1cb0f]
94+
description = "Non-existing codon can't translate"
95+
include = false
96+
97+
[9eac93f3-627a-4c90-8653-6d0a0595bc6f]
98+
description = "Unknown amino acids, not part of a codon, can't translate"
99+
reimplements = "1e75ea2a-f907-4994-ae5c-118632a1cb0f"
100+
101+
[9d73899f-e68e-4291-b1e2-7bf87c00f024]
102+
description = "Incomplete RNA sequence can't translate"
103+
104+
[43945cf7-9968-402d-ab9f-b8a28750b050]
105+
description = "Incomplete RNA sequence can translate if valid until a STOP codon"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
type amino_acid = #methionine | #phenylalanine | #leucine | #serine | #tyrosine | #cysteine | #tryptophan
2+
3+
def proteins (strand: []u8): []amino_acid = ???
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import "protein_translation"
2+
3+
-- Empty RNA sequence results in no proteins
4+
-- ==
5+
-- input { "" }
6+
-- output { empty([0][13]u8) }
7+
8+
-- Methionine RNA sequence
9+
-- ==
10+
-- input { "AUG" }
11+
-- output { ["Methionine "] }
12+
13+
-- Phenylalanine RNA sequence 1
14+
-- ==
15+
-- input { "UUU" }
16+
-- output { ["Phenylalanine"] }
17+
18+
-- Phenylalanine RNA sequence 2
19+
-- ==
20+
-- input { "UUC" }
21+
-- output { ["Phenylalanine"] }
22+
23+
-- Leucine RNA sequence 1
24+
-- ==
25+
-- input { "UUA" }
26+
-- output { ["Leucine "] }
27+
28+
-- Leucine RNA sequence 2
29+
-- ==
30+
-- input { "UUG" }
31+
-- output { ["Leucine "] }
32+
33+
-- Serine RNA sequence 1
34+
-- ==
35+
-- input { "UCU" }
36+
-- output { ["Serine "] }
37+
38+
-- Serine RNA sequence 2
39+
-- ==
40+
-- input { "UCC" }
41+
-- output { ["Serine "] }
42+
43+
-- Serine RNA sequence 3
44+
-- ==
45+
-- input { "UCA" }
46+
-- output { ["Serine "] }
47+
48+
-- Serine RNA sequence 4
49+
-- ==
50+
-- input { "UCG" }
51+
-- output { ["Serine "] }
52+
53+
-- Tyrosine RNA sequence 1
54+
-- ==
55+
-- input { "UAU" }
56+
-- output { ["Tyrosine "] }
57+
58+
-- Tyrosine RNA sequence 2
59+
-- ==
60+
-- input { "UAC" }
61+
-- output { ["Tyrosine "] }
62+
63+
-- Cysteine RNA sequence 1
64+
-- ==
65+
-- input { "UGU" }
66+
-- output { ["Cysteine "] }
67+
68+
-- Cysteine RNA sequence 2
69+
-- ==
70+
-- input { "UGC" }
71+
-- output { ["Cysteine "] }
72+
73+
-- Tryptophan RNA sequence
74+
-- ==
75+
-- input { "UGG" }
76+
-- output { ["Tryptophan "] }
77+
78+
-- STOP codon RNA sequence 1
79+
-- ==
80+
-- input { "UAA" }
81+
-- output { empty([0][13]u8) }
82+
83+
-- STOP codon RNA sequence 2
84+
-- ==
85+
-- input { "UAG" }
86+
-- output { empty([0][13]u8) }
87+
88+
-- STOP codon RNA sequence 3
89+
-- ==
90+
-- input { "UGA" }
91+
-- output { empty([0][13]u8) }
92+
93+
-- Sequence of two protein codons translates into proteins
94+
-- ==
95+
-- input { "UUUUUU" }
96+
-- output { ["Phenylalanine", "Phenylalanine"] }
97+
98+
-- Sequence of two different protein codons translates into proteins
99+
-- ==
100+
-- input { "UUAUUG" }
101+
-- output { ["Leucine ", "Leucine "] }
102+
103+
-- Translate RNA strand into correct protein list
104+
-- ==
105+
-- input { "AUGUUUUGG" }
106+
-- output { ["Methionine ", "Phenylalanine", "Tryptophan "] }
107+
108+
-- Translation stops if STOP codon at beginning of sequence
109+
-- ==
110+
-- input { "UAGUGG" }
111+
-- output { empty([0][13]u8) }
112+
113+
-- Translation stops if STOP codon at end of two-codon sequence
114+
-- ==
115+
-- input { "UGGUAG" }
116+
-- output { ["Tryptophan "] }
117+
118+
-- Translation stops if STOP codon at end of three-codon sequence
119+
-- ==
120+
-- input { "AUGUUUUAA" }
121+
-- output { ["Methionine ", "Phenylalanine"] }
122+
123+
-- Translation stops if STOP codon in middle of three-codon sequence
124+
-- ==
125+
-- input { "UGGUAGUGG" }
126+
-- output { ["Tryptophan "] }
127+
128+
-- Translation stops if STOP codon in middle of six-codon sequence
129+
-- ==
130+
-- input { "UGGUGUUAUUAAUGGUUU" }
131+
-- output { ["Tryptophan ", "Cysteine ", "Tyrosine "] }
132+
133+
-- Sequence of two non-STOP codons does not translate to a STOP codon
134+
-- ==
135+
-- input { "AUGAUG" }
136+
-- output { ["Methionine ", "Methionine "] }
137+
138+
-- Unknown amino acids, not part of a codon, can't translate
139+
-- ==
140+
-- input { "XYZ" }
141+
-- error: Error*
142+
143+
-- Incomplete RNA sequence can't translate
144+
-- ==
145+
-- input { "AUGU" }
146+
-- error: Error*
147+
148+
-- Incomplete RNA sequence can translate if valid until a STOP codon
149+
-- ==
150+
-- input { "UUCUUCUAAUGGU" }
151+
-- output { ["Phenylalanine", "Phenylalanine"] }
152+
153+
local def name (a: amino_acid): [13]u8 =
154+
match a
155+
case #methionine -> "Methionine "
156+
case #phenylalanine -> "Phenylalanine"
157+
case #leucine -> "Leucine "
158+
case #serine -> "Serine "
159+
case #tyrosine -> "Tyrosine "
160+
case #cysteine -> "Cysteine "
161+
case #tryptophan -> "Tryptophan "
162+
163+
def main (strand: []u8): [][13]u8 =
164+
map1 name (proteins strand)

0 commit comments

Comments
 (0)