= "Escherichia coli"
species_name = 4_600_000
genome_size
print(f"The species {species_name} has a genome size of {genome_size} base pairs.")
The species Escherichia coli has a genome size of 4600000 base pairs.
= "Escherichia coli"
species_name = 4_600_000
genome_size
print(f"The species {species_name} has a genome size of {genome_size} base pairs.")
The species Escherichia coli has a genome size of 4600000 base pairs.
= 19_123
gc_count = 40_000
total_bases
= gc_count / total_bases
gc_content print(f"GC content: {gc_content:.2f}")
GC content: 0.48
= "ACTGGTCAA"
sequence print(f"{sequence=}")
= sequence[0:4]
first_four = sequence[4:9]
last_five print(f"{first_four=}; {last_five=}")
= first_four + last_five
combined print(f"{combined=}")
sequence='ACTGGTCAA'
first_four='ACTG'; last_five='GTCAA'
combined='ACTGGTCAA'
= 28
quality_score if quality_score >= 30:
print("Pass")
else:
print("Fail")
= True
is_long_enough = False
is_high_quality
if is_long_enough and is_high_quality:
print("Accepted")
else:
print("Rejected")
Fail
Rejected
= [2.1, 3.4, 1.8, 6.2, 4.0]
expression_levels
= min(expression_levels)
min_value = max(expression_levels)
max_value = sum(expression_levels) / len(expression_levels)
mean_value
print(f"min: {min_value}")
print(f"max: {max_value}")
print(f"mean: {mean_value}")
= expression_levels[0]
first_item print(f"type of first item: {type(first_item)}")
min: 1.8
max: 6.2
mean: 3.5
type of first item: <class 'float'>
= []
sequences
if sequences:
print("Sequences loaded!")
else:
print("No sequences found!")
print(bool(""))
print(bool("AGTC"))
print(bool(0))
print(bool(3.14))
print(bool([]))
print(bool(["AGTC"]))
No sequences found!
False
True
False
True
False
True
str = "ACTG"
print(len(str))
# Note: Don't worry about the try/except for now.
# There will be a whole chapter about it later!
try:
= str(3.14)
pi except TypeError as error:
print(f"There was an error! {error=}")
4
There was an error! error=TypeError("'str' object is not callable")
= 120
read_length = 0.55
gc_content = 32
quality_score
= read_length >= 100
read_length_good = 0.4 <= gc_content <= 0.6
gc_content_good = quality_score > 30
quality_score_good
if read_length_good and gc_content_good and quality_score_good:
print("Read passes all quality filters")
else:
print("Read filtered out")
Read passes all quality filters
= "nrdA"
gene_id = 0.000012345
p_value
print(f"Gene {gene_id} => {p_value:.2e}")
if p_value < 0.01:
print("Highly significant")
elif p_value < 0.05:
print("Significant")
elif p_value < 0.10:
print("Almost significant")
else:
print("Not significant")
Gene nrdA => 1.23e-05
Highly significant
= "TGacTGatcGT".upper()
dna_sequence
= len(dna_sequence)
sequence_length
= dna_sequence.count("A")
a_count = dna_sequence.count("C")
c_count = dna_sequence.count("G")
g_count = dna_sequence.count("T")
t_count
= dna_sequence.count("N")
ambiguous_count
= (g_count + c_count) / sequence_length * 100
gc_content
= (
molecular_weight * 313.2 + t_count * 304.2 + c_count * 289.2 + g_count * 329.2 + 79.0
a_count
)
print(f"DNA Sequence: {dna_sequence}")
print(f"Length: {sequence_length}")
print(f"Nucleotide counts")
print(f"A: {a_count}, C: {c_count}, G: {g_count}, T: {t_count}")
print(f"Ambiguous count: {ambiguous_count}")
print(f"GC Content (%): {gc_content:.1f}")
print(f"Molecular weight: {molecular_weight:.1f}")
DNA Sequence: TGACTGATCGT
Length: 11
Nucleotide counts
A: 2, C: 2, G: 3, T: 4
Ambiguous count: 0
GC Content (%): 45.5
Molecular weight: 3488.2
= 250
sequence_length = 32
quality_score = 1
ambiguous_bases
if sequence_length >= 200:
if quality_score >= 30:
if ambiguous_bases <= 1:
print("Sequence accepted")
else:
print("Sequence rejected: too many ambiguous bases")
else:
print("Sequence rejected: low quality")
else:
print("Sequence rejected: too short")
Sequence accepted
Here is one way you might solve the optional extension for this problem:
= 250
sequence_length = 32
quality_score = 1
ambiguous_bases
if sequence_length < 200:
print("Sequence rejected: too short")
elif quality_score < 30:
print("Sequence rejected: low quality")
elif ambiguous_bases > 1:
print("Sequence rejected: too many ambiguous bases")
else:
print("Sequence accepted")
Sequence accepted