Appendix H — Practice Problem Solutions — Chapter 9

Author

Ryan M. Moore, PhD

Published

April 24, 2025

Modified

April 28, 2025

Set Up

Set up some fake data that we will use in the practice problems.

with open("sample.txt", "w") as file:
    file.write("first line\nsecond line\nthird line\n")

data_lines = [
    "1. Basics\n",
    "2. Collections\n",
    "3. Algorithms\n",
    "4. Functions\n",
    "5. OOP\n",
    "6. Errors\n",
    "7. EDA\n",
    "8. Stats\n",
    "9. I/O\n",
]
with open("data.txt", "w") as file:
    file.writelines(data_lines)

Group 1

  1. Open a file called data.txt for reading, print its type, then close it.
file = open("data.txt", "r")
print(type(file))
file.close()
<class '_io.TextIOWrapper'>
  1. Write “Hello, World!” into a file named test.txt.
with open("test.txt", "w") as f:
    f.write("Hello, World!")
  1. Read and print all text from a file named sample.txt.
with open("sample.txt") as f:
    content = f.read()
    print(content)
first line
second line
third line
  1. Read a file line by line and print each line without the trailing newline character(s).
with open("sample.txt") as file:
    for line in file:
        print(line.strip())
first line
second line
third line
  1. Append the text “New Entry” to log.txt.
with open("log.txt", "a") as file:
    file.write("New Entry\n")
  1. Print the file’s name and mode after opening it.
with open("sample.txt") as file:
    print(file.name)
    print(file.mode)
sample.txt
r
  1. Write three lines to multi.txt: “One”, “Two”, “Three”, each on its own line.
with open("multi.txt", "w") as file:
    file.write("One\n")
    file.write("Two\n")
    file.write("Three\n")
  1. Use a for-loop to write the numbers 1-5 to a file (one per line).
with open("numbers.txt", "w") as file:
    for i in range(1, 6):
        file.write(str(i) + "\n")
  1. Print "File is closed" if file is closed after exiting a with-block.
with open("sample.txt") as file:
    pass

if file.closed:
    print("File is closed")
File is closed
  1. Use readline() to read and print just the first line of sample.txt.
with open("sample.txt") as file:
    print(file.readline().strip())
first line
  1. Create a function that prints the contents of a file it is given.
def print_file(file_path):
    with open(file_path) as file:
        print(file.read())
  1. Use a for loop to write a list of fruits into a file, one fruit per line.
fruits = ["apple", "banana", "cherry"]
with open("fruits.txt", "w") as file:
    for fruit in fruits:
        file.write(fruit + "\n")
  1. Read and print the first eight characters of sample.txt.
with open("sample.txt", "rb") as file:
    print(file.read(8))
b'first li'
  1. Demonstrate that opening an existing file in write mode ("w") mode erases its contents.
with open("test.txt", "w") as file:
    file.write("contents")

with open("test.txt", "w") as file:
    file.write("Overwritten!")

with open("test.txt") as file:
    contents = file.read()
    assert contents == "Overwritten!"
  1. Use a try-except block to print a message if not_a_file.txt does not exist.
try:
    with open("not_a_file.txt") as file:
        data = file.read()
except FileNotFoundError as error:
    print(f"{error=}")
error=FileNotFoundError(2, 'No such file or directory')
  1. Print file position (using .tell()) before and after reading 4 bytes.
with open("sample.txt", "rb") as file:
    print(file.tell())
    file.read(4)
    print(file.tell())
0
4
  1. Write binary bytes b'ABC' to a file called bytes.bin.
with open("bytes.bin", "wb") as file:
    file.write(b"ABCDEFGHIJK")
  1. Read the binary file you just created (bytes.bin) and print the first five bytes.
with open("bytes.bin", "rb") as f:
    print(f.read(5))
b'ABCDE'
  1. Use "rt" mode to read text and "wb" mode to write bytes.
with open("sample.txt", "rt") as file:
    print(file.read())
with open("bytes.bin", "wb") as file:
    file.write(b"xyz")
first line
second line
third line
  1. Print the error message if a file open operation raises an OSError.
try:
    with open("/fake/file.txt") as file:
        content = file.read()
except OSError as error:
    print(f"{error=}")
error=FileNotFoundError(2, 'No such file or directory')
  1. Print the first line from a file, then use .seek(0) to go back to the beginning of the file and re-print the first line.
with open("sample.txt") as file:
    print(file.readline().strip())
    file.seek(0)
    print(file.readline().strip())
first line
first line
  1. Use with statement to write the line "Finished!" into finished.txt.
with open("finished.txt", "w") as file:
    file.write("Finished!\n")
  1. Open the file finished.txt and append the line "Appending again!".
with open("finished.txt", "a") as file:
    file.write("Appending again!\n")
  1. Create a dictionary, and write each key-value pair to a file (format: key => value).
d = {"A": 1, "B": 2}
with open("dict.txt", "w") as file:
    for k, v in d.items():
        file.write(f"{k} => {v}\n")
  1. Print current working directory using os.getcwd() module.
import os
print(os.getcwd())
  1. List files in the current directory with os.listdir().
import os
print(os.listdir("."))
  1. Pass a file name to os.listdir(), then handle the error using try/except.
import os

dirname = "sample.txt"

try:
    os.listdir(dirname)
except NotADirectoryError:
    print(f"'{dirname}' is not a directory!")
'sample.txt' is not a directory!
  1. After writing three lines to a file called sample.txt, read the file and print the number of lines. (Use writelines() and readlines().)
lines = ["first line\n", "second line\n", "third line\n"]

with open("sample.txt", "w") as file:
    file.writelines(lines)

with open("sample.txt") as file:
    print(len(file.readlines()))
3
  1. Use seek to skip the first 3 bytes then print the rest of the file.
with open("sample.txt", "rb") as file:
    file.seek(3)
    print(file.read())
b'st line\nsecond line\nthird line\n'
  1. Catch any OSError when trying to open a file.
try:
    with open("maybe_missing.txt") as file:
        data = file.read()
except OSError as error:
    print("Caught OSError:", error)
Caught OSError: [Errno 2] No such file or directory: 'maybe_missing.txt'

Group 2

  1. Read all lines from data.txt into a list, then write every second line to even_lines.txt.
with open("data.txt") as file:
    lines = file.readlines()

with open("even_lines.txt", "w") as file:
    for i, line in enumerate(lines):
        if i % 2 == 1:
            file.write(line)

# Check your work!
with open("even_lines.txt") as file:
    for line in file:
        print(line.strip())
2. Collections
4. Functions
6. Errors
8. Stats
  1. Write user input (entered with input()) to a file called user.txt.
text = input("Enter something: ")
with open("user.txt", "w") as f:
    f.write(text)
  1. Open data.txt for writing and write 10 lines ("Line {i}"). Then, open the same file again and append a summary line: "Total lines: 10".
with open("data.txt", "w") as file:
    for i in range(10):
        file.write(f"Line {i + 1}\n")

with open("data.txt", "a") as file:
    file.write("Total lines: 10\n")

# Check your work!
with open("data.txt") as file:
    print(file.read().strip())
Line 1
Line 2
Line 3
Line 4
Line 5
Line 6
Line 7
Line 8
Line 9
Line 10
Total lines: 10
  1. Write each character of a string to a new line in a text file.
message = "coding is cool"
with open("chars.txt", "w") as file:
    for letter in message:
        file.write(f"{letter}\n")

# Check your work!
with open("chars.txt") as file:
    print(file.read().strip())
c
o
d
i
n
g
 
i
s
 
c
o
o
l
  1. Ask for a filename. Try to read and print it, or print “Not found!” if the file does not exist.
filename = input("Filename: ")
try:
    with open(filename) as f:
        print(f.read())
except FileNotFoundError:
    print(f"{filename} was not found!")

TODO: make a note about the stdin stuff needind to put the {} around python to try it for yourself.

  1. Write an integer list to a text file, then read it and compute their sum.
numbers = [1, 2, 3, 4]

with open("numbers.txt", "w") as file:
    for number in numbers:
        file.write(str(number) + "\n")

with open("numbers.txt") as file:
    total = sum(int(line.strip()) for line in file)

print(total)
10
  1. Read up to the 10th character of a file and print those characters backwards.

    with open('sample.txt') as f:
        text = f.read(10)
        print(text[::-1])
  2. Write a file, then read its contents twice using seek().

with open("temp.txt", "w") as file:
    file.write("Magic Beans\n")

with open("temp.txt") as file:
    contents = file.read()
    print(contents.strip())

    file.seek(0)

    contents = file.read()
    print(contents.strip())
Magic Beans
Magic Beans

TODO: decide on a couple of file names and just use those

  1. Write three words to a file, each on their own line. Then, print all the lines of that file in uppercase.
words = ["apple", "pie", "is", "good"]

with open("numbers.txt", "w") as file:
    for word in words:
        file.write(f"{word}\n")

with open("numbers.txt") as file:
    for line in file:
        print(line.strip().upper())
APPLE
PIE
IS
GOOD
  1. Write some lines to a file, including some empty lines. Then, read the file back, counting the number of empty lines.
with open("sample.txt", "w") as file:
    for line in ["this", "", "is", "a", "", "line"]:
        file.write(f"{line}\n")

with open("sample.txt") as file:
    blank_line_count = sum(line.strip() == "" for line in file)

print(f"there were {blank_line_count} empty lines!")
there were 2 empty lines!
  1. Write two lists (genes and counts) into a file as gene,count rows.
genes = ["nrdA", "nrdJ"]
counts = [10, 20]

with open("pairs.csv", "w") as file:
    for gene, count in zip(genes, counts):
        file.write(f"{gene},{count}\n")

# Check your work!
with open("pairs.csv") as file:
    print(file.read().strip())
nrdA,10
nrdJ,20
  1. Write some lines to a file, some of which contain the word "gene". Then, open that file and print every line that contains the word "gene".
with open("data.txt", "w") as file:
    file.writelines(
        [
            "gene therapy\n",
            "protein sequences\n",
            "gene annotation\n",
            "analyzing gene expression\n",
            "multiple sequence alignment\n",
        ]
    )

with open("data.txt") as file:
    for line in file:
        if "gene" in line:
            print(line.strip())
gene therapy
gene annotation
analyzing gene expression
  1. Read the contents from one file and write it uppercased to another file. (Read the input file line-by-line.)
with open("data.txt") as input_file, open("upper.txt", "w") as output_file:
    for line in input_file:
        output_file.write(line.upper())

# Check your work!
with open("upper.txt") as file:
    for line in file:
        print(line.strip())
GENE THERAPY
PROTEIN SEQUENCES
GENE ANNOTATION
ANALYZING GENE EXPRESSION
MULTIPLE SEQUENCE ALIGNMENT
  1. Try to open a file that doesn’t exist without crashing the program.
try:
    with open('/fake/file.txt') as file:
        _ = file.read()
except OSError as error:
    print(f"{error=}")
error=FileNotFoundError(2, 'No such file or directory')
  1. Create a list of dictionaries like this: {"A": 1, "B": 2, "C": 3}. Then write the data as a CSV file with a header line.
rows = [
    {"A": 1, "B": 4, "C": 7},
    {"A": 2, "B": 5, "C": 8},
    {"A": 3, "B": 6, "C": 9},
]

with open("table.csv", "w") as file:
    file.write("A,B,C\n")
    for row in rows:
        values = [str(value) for value in row.values()]
        line = ",".join(values)
        file.write(f"{line}\n")

# Check your work!
with open("table.csv") as file:
    for line in file:
        print(line.strip())
A,B,C
1,4,7
2,5,8
3,6,9
  1. Create a small FASTA file. Then, read the file and count how many lines in a file start with “>”.
with open("sequences.fasta", "w") as file:
    file.write(">seq_1\n")
    file.write("ACTG\n")
    file.write(">seq_2\n")
    file.write("GGCAC\n")
    file.write(">seq_3\n")
    file.write("AAACTA\n")


with open("sequences.fasta") as file:
    record_count = sum(line.startswith(">") for line in file)

print(record_count)
3
  1. Copy the header lines from the FASTA file you just created into another file. Do not print the > in the output file.
with open("sequences.fasta") as fasta_file, open("headers.txt", "w") as output_file:
    for line in fasta_file:
        if line.startswith(">"):
            output_line = line.strip()[1:] + "\n"
            output_file.write(output_line)

# Check your work!
with open("headers.txt") as file:
    for line in file:
        print(line.strip())
seq_1
seq_2
seq_3
  1. Write a few lines to a file. One of the lines should be "exit". Then, read the lines of the file you created, but stop as soon as you read the "exit" line.
with open("data.txt", "w") as file:
    file.writelines(
        [
            "line 1\n",
            "line 2\n",
            "exit\n",
            "line 3\n",
        ]
    )

with open("data.txt") as file:
    for line in file:
        line = line.strip()
        if line == "exit":
            break

        print(line)
line 1
line 2
  1. Open an output file, write one line, then print the output of file.closed. Next, use with to open the file, and after the block, print the result of file.closed again.
file = open("output.txt", "w")
file.write("gene 1\n")
print(file.closed)
file.close()

with open("output.txt", "w") as file:
    file.write("gene 2\n")

print(file.closed)
False
True
  1. Write three numbers to a binary file as bytes, then read, and print them as integers.
numbers = [7, 8, 9]

with open("numbers.dat", "wb") as file:
    file.write(bytes(numbers))

with open("numbers.dat", "rb") as file:
    data = file.read()
    print(type(data))
    print(data)

print(list(data))
<class 'bytes'>
b'\x07\x08\t'
[7, 8, 9]

Group 3

  1. Using biopython, write code that opens a FASTA file and (1) prints the sequence ID and length for each sequence, and (2) prints the mean sequence length. (Use the FASTA sequence you created earlier.)
sequence_count = 0
total_length = 0

from Bio import SeqIO

for record in SeqIO.parse("sequences.fasta", "fasta"):
    sequence_count += 1
    seq_length = len(record.seq)
    total_length += seq_length

    print(record.id, seq_length, sep="\t")

print("\nTotal sequences:", sequence_count)
print("\nMean length:", total_length / sequence_count)
seq_1   4
seq_2   5
seq_3   6

Total sequences: 3

Mean length: 5.0
  1. Write the contents of a dictionary to a TSV file. Each line should be like key\tvalue. Then read the file, insert any lines where the value is greater than or equal to 10 into a new dictionary.
data = {"a": 1, "b": 2, "c": 3, "d": 10, "e": 20, "f": 30}
with open("dict.tsv", "w") as file:
    for key, value in data.items():
        line = f"{key}\t{value}\n"
        file.write(line)

filtered_data = {}
with open("dict.tsv") as file:
    for line in file:
        key, value = line.strip().split("\t")
        if int(value) >= 10:
            filtered_data[key] = value

print(filtered_data)
{'d': '10', 'e': '20', 'f': '30'}
  1. Using pandas, create a data frame with the following data: {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}, and write it to a CSV without the row index. Read the resulting file using csv.DictReader. Print any record in which the value in field “A” is >= 2 and the value in field “C” is <= 8.
import csv
import pandas as pd

df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
df.to_csv("df.csv", index=False)

with open("df.csv", newline="") as file:
    for record in csv.DictReader(file):
        if int(record["A"]) >= 2 and int(record["C"]) <= 8:
            print(record)
{'A': '2', 'B': '5', 'C': '8'}
  1. Write code that opens a FASTQ file, then prints the id and average quality score for the first 10 records.
from Bio import SeqIO
import numpy as np

# TODO: get the data in the write location
for i, record in enumerate(SeqIO.parse("../../_data/sample_1.fastq", "fastq")):
    if i >= 10:
        break

    quality_score = np.mean(record.letter_annotations["phred_quality"])
    print(record.id, quality_score, sep=" => ")
read_0 => 23.757142857142856
read_1 => 24.114285714285714
read_2 => 22.32857142857143
read_3 => 23.357142857142858
read_4 => 22.15714285714286
read_5 => 25.071428571428573
read_6 => 25.87142857142857
read_7 => 22.185714285714287
read_8 => 23.87142857142857
read_9 => 24.257142857142856
  1. Read a binary file and print each byte in hexadecimal. (Use the built-in hex() function.)
with open("data.bin", "wb") as file:
    file.write(b"apple pie")

with open("data.bin", "rb") as file:
    data = file.read()

for byte in data:
    print(hex(byte))
0x61
0x70
0x70
0x6c
0x65
0x20
0x70
0x69
0x65
  1. Try to read and print the contents of a list of files. If any file doesn’t exist, skip it and print a message about the file not being found.
filenames = ["fake.txt", "data.txt", "nope.txt"]

for filename in filenames:
    print()
    try:
        with open(filename) as file:
            print(f"found {filename}!")
            print(file.read().strip())
            print("DONE!")
    except FileNotFoundError:
        print(f"file '{filename}' not found")

file 'fake.txt' not found

found data.txt!
line 1
line 2
exit
line 3
DONE!

file 'nope.txt' not found
  1. Write the given gene_data to a file. Then, read the lines of the file, extracting gene names and sequences from each line using using regular expressions. Finally, print each gene name and sequence in the format “name => sequence”.
import re

gene_data = [
    "gene: nrdA; seq: AACCTTG\n",
    "gene: nrdJd; seq: ACACGGT\n",
    "gene: pol; seq: AAACGGTAA\n",
]

with open("gene_data.txt", "w") as file:
    file.writelines(gene_data)

pattern = r"gene: ([a-zA-Z]+); seq: ([ACTG]+)"

with open("gene_data.txt") as file:
    for line in file:
        matches = re.fullmatch(pattern, line.strip())
        gene_name = matches[1]
        sequence = matches[2]
        print(gene_name, sequence, sep=" => ")
nrdA => AACCTTG
nrdJd => ACACGGT
pol => AAACGGTAA
  1. Create a file containing 50 random words chosen from the following list ["apple", "pie", "is", "good"]. Read that file and count how many times each word occurs. Print the dictionary sorted by word count. Don’t forget to set the random seed for reproducibility!
from collections import Counter
import random

random.seed(2341)

with open("words.txt", "w") as file:
    for word in random.choices(["apple", "pie", "is", "good"], k=50):
        file.write(word + "\n")

with open("words.txt") as f:
    words = f.read().split()

counts = Counter(words)

for word in sorted(counts):
    print(word, counts[word])
apple 13
good 16
is 12
pie 9
  1. Without using the CSV module, read a CSV file. If any of the lines have a different number of fields, stop the iteration and print an error message.
with open("df.csv", "a") as file:
    file.write("1,2,3,4\n")

with open("df.csv") as file:
    fields = file.readline().strip().split(",")
    expected_length = len(fields)

    for line in file:
        line = line.strip()
        fields = line.split(",")

        if len(fields) != expected_length:
            print(
                "ERROR",
                f"line '{line}'",
                f"expected: {expected_length} fields",
                f"found: {len(fields)} fields",
                sep=" -- "
            )
            break
ERROR -- line '1,2,3,4' -- expected: 3 fields -- found: 4 fields
  1. Given a file path, open the file either as text or binary based on its extension (.txt – text mode, .bin – binary mode), and print the contents. Make sure to handle file not found errors!
path = "file.txt"

if path.endswith(".bin"):
    mode = "rb"
else:
    mode = "r"

try:
    with open(path, mode) as file:
        print(file.read())
except FileNotFoundError as error:
    print(f"file '{path}' not found!")
file 'file.txt' not found!