Can someone please help me with my code for Week 6 in CS50. It is not working
# TODO: Check for command-line usage
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py data.csv sequence.txt")
# TODO: Read database file into a variable
database = []
with open(sys.argv[1], "r") as file:
reader = csv.DictReader(file)
for name in reader:
name["AGATC"] = int(name["AGATC"])
name["TTTTTTCT"] = int(name["TTTTTTCT"])
name["AATG"] = int(name["AATG"])
name["TCTAG"] = int(name["TCTAG"])
name["TATC"] = int(name["TATC"])
name["GATA"] = int(name["GATA"])
name["GAAA"] = int(name["GAAA"])
name["TCTG"] = int(name["TCTG"])
database.append(name)
# TODO: Read DNA sequence file into a variable
dna = []
with open(sys.argv[2], "r") as file:
dna = file.read()
# TODO: Find longest match of each STR in DNA sequence
AGATC = int(longest_match(dna, "AGATC"))
TTTTTTCT = int(longest_match(dna, "TTTTTTCT"))
AATG = int(longest_match(dna, "AATG"))
TCTAG = int(longest_match(dna, "TCTAG"))
TATC = int(longest_match(dna, "TATC"))
GATA = int(longest_match(dna, "GATA"))
GAAA = int(longest_match(dna, "GAAA"))
TCTG = int(longest_match(dna, "TCTG"))
# TODO: Check database for matching profiles
for row in database:
if TCTG and TTTTTTCT and AATG and TCTAG and TATC and GATA and GAAA and AGATC in row:
print(row["name"])
return
else:
continue
# Initialize variables
longest_run = 0
subsequence_length = len(subsequence)
sequence_length = len(sequence)
# Check each character in sequence for most consecutive runs of subsequence
for i in range(sequence_length):
# Initialize count of consecutive runs
count = 0
# Check for a subsequence match in a "substring" (a subset of characters) within sequence
# If a match, move substring to next potential match in sequence
# Continue moving substring and checking for matches until out of consecutive matches
while True:
# Adjust substring start and end
start = i + count * subsequence_length
end = start + subsequence_length
# If there is a match in the substring
if sequence[start:end] == subsequence:
count += 1
# If there is no match in the substring
else:
break
# Update most consecutive matches found
longest_run = max(longest_run, count)
# After checking for runs at each character in seqeuence, return longest run found
return longest_run