with open("sample.txt", "w") as file:
file.write("first line\nsecond line\nthird line\n")
= [
data_lines "1. Basics\n",
"2. Collections\n",
"3. Algorithms\n",
"4. Functions\n",
"5. OOP\n",
"6. Errors\n",
"7. EDA\n",
"8. Stats\n",
"9. I/O\n",
]with open("data.txt", "w") as file:
file.writelines(data_lines)
Appendix H — Practice Problem Solutions — Chapter 9
Set Up
Set up some fake data that we will use in the practice problems.
Group 1
- Open a file called
data.txt
for reading, print its type, then close it.
file = open("data.txt", "r")
print(type(file))
file.close()
<class '_io.TextIOWrapper'>
- Write “Hello, World!” into a file named
test.txt
.
with open("test.txt", "w") as f:
"Hello, World!") f.write(
- Read and print all text from a file named
sample.txt
.
with open("sample.txt") as f:
= f.read()
content print(content)
first line
second line
third line
- Read a file line by line and print each line without the trailing newline character(s).
with open("sample.txt") as file:
for line in file:
print(line.strip())
first line
second line
third line
- Append the text “New Entry” to
log.txt
.
with open("log.txt", "a") as file:
file.write("New Entry\n")
- Print the file’s name and mode after opening it.
with open("sample.txt") as file:
print(file.name)
print(file.mode)
sample.txt
r
- Write three lines to
multi.txt
: “One”, “Two”, “Three”, each on its own line.
with open("multi.txt", "w") as file:
file.write("One\n")
file.write("Two\n")
file.write("Three\n")
- Use a for-loop to write the numbers 1-5 to a file (one per line).
with open("numbers.txt", "w") as file:
for i in range(1, 6):
file.write(str(i) + "\n")
- Print
"File is closed"
if file is closed after exiting awith
-block.
with open("sample.txt") as file:
pass
if file.closed:
print("File is closed")
File is closed
- Use
readline()
to read and print just the first line ofsample.txt
.
with open("sample.txt") as file:
print(file.readline().strip())
first line
- Create a function that prints the contents of a file it is given.
def print_file(file_path):
with open(file_path) as file:
print(file.read())
- Use a
for
loop to write a list of fruits into a file, one fruit per line.
= ["apple", "banana", "cherry"]
fruits with open("fruits.txt", "w") as file:
for fruit in fruits:
file.write(fruit + "\n")
- Read and print the first eight characters of
sample.txt
.
with open("sample.txt", "rb") as file:
print(file.read(8))
b'first li'
- Demonstrate that opening an existing file in write mode (
"w"
) mode erases its contents.
with open("test.txt", "w") as file:
file.write("contents")
with open("test.txt", "w") as file:
file.write("Overwritten!")
with open("test.txt") as file:
= file.read()
contents assert contents == "Overwritten!"
- Use a try-except block to print a message if
not_a_file.txt
does not exist.
try:
with open("not_a_file.txt") as file:
= file.read()
data except FileNotFoundError as error:
print(f"{error=}")
error=FileNotFoundError(2, 'No such file or directory')
- Print file position (using
.tell()
) before and after reading 4 bytes.
with open("sample.txt", "rb") as file:
print(file.tell())
file.read(4)
print(file.tell())
0
4
- Write binary bytes
b'ABC'
to a file calledbytes.bin
.
with open("bytes.bin", "wb") as file:
file.write(b"ABCDEFGHIJK")
- Read the binary file you just created (
bytes.bin
) and print the first five bytes.
with open("bytes.bin", "rb") as f:
print(f.read(5))
b'ABCDE'
- Use
"rt"
mode to read text and"wb"
mode to write bytes.
with open("sample.txt", "rt") as file:
print(file.read())
with open("bytes.bin", "wb") as file:
file.write(b"xyz")
first line
second line
third line
- Print the error message if a file open operation raises an
OSError
.
try:
with open("/fake/file.txt") as file:
= file.read()
content except OSError as error:
print(f"{error=}")
error=FileNotFoundError(2, 'No such file or directory')
- Print the first line from a file, then use
.seek(0)
to go back to the beginning of the file and re-print the first line.
with open("sample.txt") as file:
print(file.readline().strip())
file.seek(0)
print(file.readline().strip())
first line
first line
- Use
with
statement to write the line"Finished!"
intofinished.txt
.
with open("finished.txt", "w") as file:
file.write("Finished!\n")
- Open the file
finished.txt
and append the line"Appending again!"
.
with open("finished.txt", "a") as file:
file.write("Appending again!\n")
- Create a dictionary, and write each key-value pair to a file (format:
key => value
).
= {"A": 1, "B": 2}
d with open("dict.txt", "w") as file:
for k, v in d.items():
file.write(f"{k} => {v}\n")
- Print current working directory using
os.getcwd()
module.
import os
print(os.getcwd())
- List files in the current directory with
os.listdir()
.
import os
print(os.listdir("."))
- Pass a file name to
os.listdir()
, then handle the error usingtry/except
.
import os
= "sample.txt"
dirname
try:
os.listdir(dirname)except NotADirectoryError:
print(f"'{dirname}' is not a directory!")
'sample.txt' is not a directory!
- After writing three lines to a file called
sample.txt
, read the file and print the number of lines. (Usewritelines()
andreadlines()
.)
= ["first line\n", "second line\n", "third line\n"]
lines
with open("sample.txt", "w") as file:
file.writelines(lines)
with open("sample.txt") as file:
print(len(file.readlines()))
3
- Use
seek
to skip the first 3 bytes then print the rest of the file.
with open("sample.txt", "rb") as file:
file.seek(3)
print(file.read())
b'st line\nsecond line\nthird line\n'
- Catch any
OSError
when trying to open a file.
try:
with open("maybe_missing.txt") as file:
= file.read()
data except OSError as error:
print("Caught OSError:", error)
Caught OSError: [Errno 2] No such file or directory: 'maybe_missing.txt'
Group 2
- Read all lines from
data.txt
into a list, then write every second line toeven_lines.txt
.
with open("data.txt") as file:
= file.readlines()
lines
with open("even_lines.txt", "w") as file:
for i, line in enumerate(lines):
if i % 2 == 1:
file.write(line)
# Check your work!
with open("even_lines.txt") as file:
for line in file:
print(line.strip())
2. Collections
4. Functions
6. Errors
8. Stats
- Write user input (entered with
input()
) to a file calleduser.txt
.
= input("Enter something: ")
text with open("user.txt", "w") as f:
f.write(text)
- Open
data.txt
for writing and write 10 lines ("Line {i}"
). Then, open the same file again and append a summary line:"Total lines: 10"
.
with open("data.txt", "w") as file:
for i in range(10):
file.write(f"Line {i + 1}\n")
with open("data.txt", "a") as file:
file.write("Total lines: 10\n")
# Check your work!
with open("data.txt") as file:
print(file.read().strip())
Line 1
Line 2
Line 3
Line 4
Line 5
Line 6
Line 7
Line 8
Line 9
Line 10
Total lines: 10
- Write each character of a string to a new line in a text file.
= "coding is cool"
message with open("chars.txt", "w") as file:
for letter in message:
file.write(f"{letter}\n")
# Check your work!
with open("chars.txt") as file:
print(file.read().strip())
c
o
d
i
n
g
i
s
c
o
o
l
- Ask for a filename. Try to read and print it, or print “Not found!” if the file does not exist.
= input("Filename: ")
filename try:
with open(filename) as f:
print(f.read())
except FileNotFoundError:
print(f"{filename} was not found!")
TODO: make a note about the stdin stuff needind to put the {} around python to try it for yourself.
- Write an integer list to a text file, then read it and compute their sum.
= [1, 2, 3, 4]
numbers
with open("numbers.txt", "w") as file:
for number in numbers:
file.write(str(number) + "\n")
with open("numbers.txt") as file:
= sum(int(line.strip()) for line in file)
total
print(total)
10
Read up to the 10th character of a file and print those characters backwards.
with open('sample.txt') as f: = f.read(10) text print(text[::-1])
Write a file, then read its contents twice using
seek()
.
with open("temp.txt", "w") as file:
file.write("Magic Beans\n")
with open("temp.txt") as file:
= file.read()
contents print(contents.strip())
file.seek(0)
= file.read()
contents print(contents.strip())
Magic Beans
Magic Beans
TODO: decide on a couple of file names and just use those
- Write three words to a file, each on their own line. Then, print all the lines of that file in uppercase.
= ["apple", "pie", "is", "good"]
words
with open("numbers.txt", "w") as file:
for word in words:
file.write(f"{word}\n")
with open("numbers.txt") as file:
for line in file:
print(line.strip().upper())
APPLE
PIE
IS
GOOD
- Write some lines to a file, including some empty lines. Then, read the file back, counting the number of empty lines.
with open("sample.txt", "w") as file:
for line in ["this", "", "is", "a", "", "line"]:
file.write(f"{line}\n")
with open("sample.txt") as file:
= sum(line.strip() == "" for line in file)
blank_line_count
print(f"there were {blank_line_count} empty lines!")
there were 2 empty lines!
- Write two lists (
genes
andcounts
) into a file asgene,count
rows.
= ["nrdA", "nrdJ"]
genes = [10, 20]
counts
with open("pairs.csv", "w") as file:
for gene, count in zip(genes, counts):
file.write(f"{gene},{count}\n")
# Check your work!
with open("pairs.csv") as file:
print(file.read().strip())
nrdA,10
nrdJ,20
- Write some lines to a file, some of which contain the word
"gene"
. Then, open that file and print every line that contains the word"gene"
.
with open("data.txt", "w") as file:
file.writelines(
["gene therapy\n",
"protein sequences\n",
"gene annotation\n",
"analyzing gene expression\n",
"multiple sequence alignment\n",
]
)
with open("data.txt") as file:
for line in file:
if "gene" in line:
print(line.strip())
gene therapy
gene annotation
analyzing gene expression
- Read the contents from one file and write it uppercased to another file. (Read the input file line-by-line.)
with open("data.txt") as input_file, open("upper.txt", "w") as output_file:
for line in input_file:
output_file.write(line.upper())
# Check your work!
with open("upper.txt") as file:
for line in file:
print(line.strip())
GENE THERAPY
PROTEIN SEQUENCES
GENE ANNOTATION
ANALYZING GENE EXPRESSION
MULTIPLE SEQUENCE ALIGNMENT
- Try to open a file that doesn’t exist without crashing the program.
try:
with open('/fake/file.txt') as file:
= file.read()
_ except OSError as error:
print(f"{error=}")
error=FileNotFoundError(2, 'No such file or directory')
- Create a list of dictionaries like this:
{"A": 1, "B": 2, "C": 3}
. Then write the data as a CSV file with a header line.
= [
rows "A": 1, "B": 4, "C": 7},
{"A": 2, "B": 5, "C": 8},
{"A": 3, "B": 6, "C": 9},
{
]
with open("table.csv", "w") as file:
file.write("A,B,C\n")
for row in rows:
= [str(value) for value in row.values()]
values = ",".join(values)
line file.write(f"{line}\n")
# Check your work!
with open("table.csv") as file:
for line in file:
print(line.strip())
A,B,C
1,4,7
2,5,8
3,6,9
- Create a small FASTA file. Then, read the file and count how many lines in a file start with “>”.
with open("sequences.fasta", "w") as file:
file.write(">seq_1\n")
file.write("ACTG\n")
file.write(">seq_2\n")
file.write("GGCAC\n")
file.write(">seq_3\n")
file.write("AAACTA\n")
with open("sequences.fasta") as file:
= sum(line.startswith(">") for line in file)
record_count
print(record_count)
3
- Copy the header lines from the FASTA file you just created into another file. Do not print the
>
in the output file.
with open("sequences.fasta") as fasta_file, open("headers.txt", "w") as output_file:
for line in fasta_file:
if line.startswith(">"):
= line.strip()[1:] + "\n"
output_line
output_file.write(output_line)
# Check your work!
with open("headers.txt") as file:
for line in file:
print(line.strip())
seq_1
seq_2
seq_3
- Write a few lines to a file. One of the lines should be
"exit"
. Then, read the lines of the file you created, but stop as soon as you read the"exit"
line.
with open("data.txt", "w") as file:
file.writelines(
["line 1\n",
"line 2\n",
"exit\n",
"line 3\n",
]
)
with open("data.txt") as file:
for line in file:
= line.strip()
line if line == "exit":
break
print(line)
line 1
line 2
- Open an output file, write one line, then print the output of
file.closed
. Next, usewith
to open the file, and after the block, print the result offile.closed
again.
file = open("output.txt", "w")
file.write("gene 1\n")
print(file.closed)
file.close()
with open("output.txt", "w") as file:
file.write("gene 2\n")
print(file.closed)
False
True
- Write three numbers to a binary file as bytes, then read, and print them as integers.
= [7, 8, 9]
numbers
with open("numbers.dat", "wb") as file:
file.write(bytes(numbers))
with open("numbers.dat", "rb") as file:
= file.read()
data print(type(data))
print(data)
print(list(data))
<class 'bytes'>
b'\x07\x08\t'
[7, 8, 9]
Group 3
- Using biopython, write code that opens a FASTA file and (1) prints the sequence ID and length for each sequence, and (2) prints the mean sequence length. (Use the FASTA sequence you created earlier.)
= 0
sequence_count = 0
total_length
from Bio import SeqIO
for record in SeqIO.parse("sequences.fasta", "fasta"):
+= 1
sequence_count = len(record.seq)
seq_length += seq_length
total_length
print(record.id, seq_length, sep="\t")
print("\nTotal sequences:", sequence_count)
print("\nMean length:", total_length / sequence_count)
seq_1 4
seq_2 5
seq_3 6
Total sequences: 3
Mean length: 5.0
- Write the contents of a dictionary to a TSV file. Each line should be like
key\tvalue
. Then read the file, insert any lines where the value is greater than or equal to 10 into a new dictionary.
= {"a": 1, "b": 2, "c": 3, "d": 10, "e": 20, "f": 30}
data with open("dict.tsv", "w") as file:
for key, value in data.items():
= f"{key}\t{value}\n"
line file.write(line)
= {}
filtered_data with open("dict.tsv") as file:
for line in file:
= line.strip().split("\t")
key, value if int(value) >= 10:
= value
filtered_data[key]
print(filtered_data)
{'d': '10', 'e': '20', 'f': '30'}
- Using pandas, create a data frame with the following data:
{"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}
, and write it to a CSV without the row index. Read the resulting file usingcsv.DictReader
. Print any record in which the value in field “A” is >= 2 and the value in field “C” is <= 8.
import csv
import pandas as pd
= pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
df "df.csv", index=False)
df.to_csv(
with open("df.csv", newline="") as file:
for record in csv.DictReader(file):
if int(record["A"]) >= 2 and int(record["C"]) <= 8:
print(record)
{'A': '2', 'B': '5', 'C': '8'}
- Write code that opens a FASTQ file, then prints the id and average quality score for the first 10 records.
from Bio import SeqIO
import numpy as np
# TODO: get the data in the write location
for i, record in enumerate(SeqIO.parse("../../_data/sample_1.fastq", "fastq")):
if i >= 10:
break
= np.mean(record.letter_annotations["phred_quality"])
quality_score print(record.id, quality_score, sep=" => ")
read_0 => 23.757142857142856
read_1 => 24.114285714285714
read_2 => 22.32857142857143
read_3 => 23.357142857142858
read_4 => 22.15714285714286
read_5 => 25.071428571428573
read_6 => 25.87142857142857
read_7 => 22.185714285714287
read_8 => 23.87142857142857
read_9 => 24.257142857142856
- Read a binary file and print each byte in hexadecimal. (Use the built-in hex() function.)
with open("data.bin", "wb") as file:
file.write(b"apple pie")
with open("data.bin", "rb") as file:
= file.read()
data
for byte in data:
print(hex(byte))
0x61
0x70
0x70
0x6c
0x65
0x20
0x70
0x69
0x65
- Try to read and print the contents of a list of files. If any file doesn’t exist, skip it and print a message about the file not being found.
= ["fake.txt", "data.txt", "nope.txt"]
filenames
for filename in filenames:
print()
try:
with open(filename) as file:
print(f"found {filename}!")
print(file.read().strip())
print("DONE!")
except FileNotFoundError:
print(f"file '{filename}' not found")
file 'fake.txt' not found
found data.txt!
line 1
line 2
exit
line 3
DONE!
file 'nope.txt' not found
- Write the given
gene_data
to a file. Then, read the lines of the file, extracting gene names and sequences from each line using using regular expressions. Finally, print each gene name and sequence in the format “name => sequence”.
import re
= [
gene_data "gene: nrdA; seq: AACCTTG\n",
"gene: nrdJd; seq: ACACGGT\n",
"gene: pol; seq: AAACGGTAA\n",
]
with open("gene_data.txt", "w") as file:
file.writelines(gene_data)
= r"gene: ([a-zA-Z]+); seq: ([ACTG]+)"
pattern
with open("gene_data.txt") as file:
for line in file:
= re.fullmatch(pattern, line.strip())
matches = matches[1]
gene_name = matches[2]
sequence print(gene_name, sequence, sep=" => ")
nrdA => AACCTTG
nrdJd => ACACGGT
pol => AAACGGTAA
- Create a file containing 50 random words chosen from the following list
["apple", "pie", "is", "good"]
. Read that file and count how many times each word occurs. Print the dictionary sorted by word count. Don’t forget to set the random seed for reproducibility!
from collections import Counter
import random
2341)
random.seed(
with open("words.txt", "w") as file:
for word in random.choices(["apple", "pie", "is", "good"], k=50):
file.write(word + "\n")
with open("words.txt") as f:
= f.read().split()
words
= Counter(words)
counts
for word in sorted(counts):
print(word, counts[word])
apple 13
good 16
is 12
pie 9
- Without using the CSV module, read a CSV file. If any of the lines have a different number of fields, stop the iteration and print an error message.
with open("df.csv", "a") as file:
file.write("1,2,3,4\n")
with open("df.csv") as file:
= file.readline().strip().split(",")
fields = len(fields)
expected_length
for line in file:
= line.strip()
line = line.split(",")
fields
if len(fields) != expected_length:
print(
"ERROR",
f"line '{line}'",
f"expected: {expected_length} fields",
f"found: {len(fields)} fields",
=" -- "
sep
)break
ERROR -- line '1,2,3,4' -- expected: 3 fields -- found: 4 fields
- Given a file path, open the file either as text or binary based on its extension (
.txt
– text mode,.bin
– binary mode), and print the contents. Make sure to handle file not found errors!
= "file.txt"
path
if path.endswith(".bin"):
= "rb"
mode else:
= "r"
mode
try:
with open(path, mode) as file:
print(file.read())
except FileNotFoundError as error:
print(f"file '{path}' not found!")
file 'file.txt' not found!