def Option():
    print("""OPTIONS:\n
            [R]: Prompts the user to load a file (in this project S.typhimurium fasta file)\n
            [S]:  It gets the compliment of a dna\n
            [W]:  gets the revervse complement of a dna\n
            [I]:  prints rest of ORF in a DNA sequence\n
            [A]: gets the rest of ORF in one frame of a sequence\n
            [K]  gets the rest of ORF in all frames sequences\n
            [J]  gets the rest of ORF in one frame in both strands\n
            [M]  gets the longest ORF\n
            [N]  Computes the maximum length of the longest ORF over num_trials shuffles of the specfied DNA sequence\n
            [B]  Computes the Protein encoded by a sequence of DNA\n
            [F]   Returns the amino acid sequences that are likely coded by the specified dna
            [H]:  Give the use of all the Options\n
            [Q]:  To exit the program\n""")
Option()
OPTIONS:

            [R]: Prompts the user to load a file (in this project S.typhimurium fasta file)

            [S]:  It gets the compliment of a dna

            [W]:  gets the revervse complement of a dna

            [I]:  prints rest of ORF in a DNA sequence

            [A]: gets the rest of ORF in one frame of a sequence

            [K]  gets the rest of ORF in all frames sequences

            [J]  gets the rest of ORF in one frame in both strands

            [M]  gets the longest ORF

            [N]  Computes the maximum length of the longest ORF over num_trials shuffles of the specfied DNA sequence

            [B]  Computes the Protein encoded by a sequence of DNA

            [F]   Returns the amino acid sequences that are likely coded by the specified dna
            [H]:  Give the use of all the Options

            [Q]:  To exit the program

def H():
    Option()
H()
OPTIONS:

            [R]: Prompts the user to load a file (in this project S.typhimurium fasta file)

            [S]:  It gets the compliment of a dna

            [W]:  gets the revervse complement of a dna

            [I]:  prints rest of ORF in a DNA sequence

            [A]: gets the rest of ORF in one frame of a sequence

            [K]  gets the rest of ORF in all frames sequences

            [J]  gets the rest of ORF in one frame in both strands

            [M]  gets the longest ORF

            [N]  Computes the maximum length of the longest ORF over num_trials shuffles of the specfied DNA sequence

            [B]  Computes the Protein encoded by a sequence of DNA

            [F]   Returns the amino acid sequences that are likely coded by the specified dna
            [H]:  Give the use of all the Options

            [Q]:  To exit the program
#provide a nucleotide sequnce
nucleotide ='ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
def get_complement(nucleotide):
    """ Returns the complementary nucleotide

        nucleotide: a nucleotide (A, C, G, or T) represented as a string
        returns: the complementary nucleotide
    >>> get_complement('A')
    'T'
    >>> get_complement('C')
    'G'
    """
    complement = nucleotide.replace("A","t")   #V#replace with small letters and the make uppercase to prevent confusion
    complement = complement.replace("T","a")
    complement = complement.replace("C","g")
    complement = complement.replace("G","c")
    complement = complement.upper()  #return to upper case
    return complement
get_complement(nucleotide)
'TACCGATCGCTACGCTCGGGATGGCACTGGCTAGGTACTCTCTACGAGATCCGATACTTACTGCATCGC'
get_reverse_complement(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
'GGATCCCATGAACGTCTTTTAGCGTGGCGCCTTCACTAACGGCATCCCAAATCATTTCCGCAACACGTTCTTCGCTGACATTATTTTGATAATCCATTACTTACTCCTGTTATCTGTCACCGACTTTGTAGAACTTAACGACTGCGTTTATCTGATGCAGTTATTAAACCCCGACGGTGGTTAGTGAACATTCAAAAAACGCCCAATGAATACATCGCTACTGCCTTACGCGGCTCAATGCCGTACCTCGTTTTCTTGTGGCTGAATAACGTCTTTGCCCGCGTTTTCTACCTCTTCCAGCCAAACCAGAAGACGTAAAACTTCATCAATTTCTTCCAGACTCACCAGATCATAACGGCGATGGGTTTTGAAAAGACTGCGCGCCAGTTTGATATCGACGATCACAGGTACGCCAACCTTCTCCGCATAGGCGCGGACGGCCAGTGCGCGCTGATTCGTTTCATACACCGAGATCATCGGAATCGGCATCAATTCGGGTTTAAAATAAATCCCGATCGTAATATGCGTGGGGTTGGCAACAATCAGGCGTGAGTTTTCAATATCAGATTTCACCTGTTCAGACAGAATTTCCATATGAACTTCACGTCTTTTAGATTTAACCTCTGGGTTCCCTTCCTGCTCCTTCATTTCACGCTTCACTTCTTCCTTATCCATTTTCATATCTTTCATGGTCAGGAAATATTCCGCAATAGCATCCAATAATAAGACAATCAATGCGCAAGCAAGGCAAGTTAATACCAATGCGAGGAGAAGTTCACGCCAAATGACGGCAATACCTACAATATTGCCATTTAGCTGAGAAAAGATTTCAACCTTATATTTCTTCCAGCAAATGATGGCGGCCACCACAAAGGATGAGAGATACAGTAGGGTTTTGACCGTATCTTTAACCGTGCGCATACTAAAAAGTTTTTTTGCCCCTTCTACCGGGTTTAACGCCGATAAATTAGGCTTTAATGCTTCTGTCGCCAGCACAAAACCGGCCTGTAATAACGCCGGTAATGCGGAACACACTAAGCAGAGCAGCATAAATGGAATCAGATATTTTAACCCTATCCCAAAAACGGCCAAACTGTAGTCAGCCATGCTCTGATCAAAATTATCCGCAATAATGATCTTAATTATCCCCATAAACTCATTAAATGAGCCATACGACACCAGATAGGCAATTCCTCCCAGCGTCAGGCAGGCGATAATGAGATCTTTACTTTTAAATGACTGGCCTTTTTTAGCGGAGTCTTCCAGCCGTTTTTTAGTCGGTTTTTCTGTTTTATTCGAGGACATGCGTCGCCCCTCGCTCGTAAAACCAACTGCTTAACCCTGTGGCCTGGAAAGAGAGTCGCAGTACATTGTCCGGTAGTACCGGAGAGAAATAAAGCAGCATAATTAAAACGGCAATACCGCTTTTTACCGTCAGTGAAATCGCAAAAGCGTTCATTTGCGGAGCAAAGCGCGACAATAAACCCAGGAATACTTCTGACAGCAACAGCACTAATACCACCGGACTGGCCAGAACCAAGGCGTTTTGAGCCACCTGATTAATAAACGTTAATAGCGGCGGTAATGAAGGCGTGCACTCGTTCATCGGATCGCATAGCTGATAGCTTTTATTTAACACGTCAACCATCGTGACCAGACCGCCGTTTTGTAAATAAACGACAGCGGCAAACATATTCAGGAAATTAGCCATTTCCGAGGTATCAATACCGTTTGCCGGATCGATACTACTACTTAGCGTTGCCCCTCGCTGGTTATCGATAATACAACCCAGCGCATGCATAACCCAAAAAGGCCATGACAGCAGACAGCCCAGCATGACGCCTACCGCCGCTTCTTGCAGAACTAACGGGATCATCGCCACCGATAAAAACGGCGGCGCCTCGTTCAATGCATGCGGCCATACTCCCAATGCCACCAGGATGATAATGGCGTTTCTCGGCGCACCGCTTAATACCCCGCTATTCAAAAACGGCAGGAAGAAAAAAATCGGCGCCACGCGAGCAAACCCTAGCGCCGCAGACGCAACCAGGTGATGAATTTCAAAGTACAACGCGTAAAACATTTTTTACCCCTTAGCCAACGCCAGGAATATCACCTGACGCCCGTAAGAGAGTAAAACTTCGCCATACCAGCCAGACAGTAAAAACAAGCATAAACACACGCCAAGTAATTTAATGCCAAAAGGCAGCGTCTGTTCCTGTAATTGCGTTACCGTCTGGAATAACCCTACCAGGAGGCCGATAATCGTTGCGACAATCGTCGGCCACCCTGACAGGATCAAAACAAGATAGAGCGCCTTATTACCTGCAAACACTAAATCATCCATTTAACTATCCCGTCTCGTAATGATGTCATGTTGCAATGTCCATATACTGTAATATCAATCCCTTAGACAGTAAGGTCCAGCCATCAAGCGCGACAAAAAGCACCAGCTTAATAGGTGTAGATATCGTCACCGGACTCATCATCATCATCCCCAGCGCCAGTAGCACGCTGGATACCACCAGGTCGACGACGACAAAGGGCAAATAAAGATAAAAACCAATTTTAAACGCGCTTTTTATTTCGCTCAGCGCATAAGCAGGTAATAACGCAAATATTGAAGGTTTTTCAATTTCATCTTTGTCACGCTTTACCGTCTCGGTCTCTTCTCCATACTGACGCTTCAGTTGCGCGTTTTCAAAAAACTGAACTAACTCGCGATCTGAATATTTGATCAGATAATCGCGATAACCATCCAGACCTTCATCAACGTGTTTACTTAATGATGAAATATCATTAAAGGTGACATCTTCGTCCTCAAAATAGACGTAGGCATCATGCATTATGGGCCACATAACAAACATAGAAAGCAGCAATGCGACGCCGTTAAGCGTCATATTTGAAGGTATCTGCTGTAATCCCAGGGCGTTACGCACCATGACAAATACAATAGAAAATTTAACGAAACAGGTTCCTGACGCAATAATAAATGGCAACAGGGTGGAAAATGCCAGTAAGGCAATTAATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCCTAAGGTGTCATTCATCTGTACCAGTTCGCCATTACCCAGCAAAACACCATTCGCCATAATTTCAACGTTAAGTTCAGCATTGGTCGGCAGTGATAATAGCTGTTGCTGCCCCATGGCTTCGAGTTCGGCGAGGGTAACGTTCTTACGATACAAAACAAATTCCAGTTTGACGGGCAATTGATTCAAGCCAGGCAGAGTTTCTGCAGTTTCAGTTGTATTATTTTCTTCTTCGATATGTTGAATATCTAACGTTTCCACAATAATTCCCCCTTCAACACGGTTGAAATGACCTAACTTTTTCGCGTAGCAATAAACTTCCGCACGGGAAGTACGAATCAGGAGTACATCTCCGATCCCGATTCGGCCCAGCAACGAACGCTGCGTATCACTGCTACCGATTACAAAGCGCAACGGCCAACGCAGCATTTTCGGCCTGCCGCCCCCGACTGCAGGCAGTTCAGGAAGATGCTCAAACCACAGGCCGCCCCGATCGCTCATAATGTGCAACAATTTCCCTTCCGGCAGCGCGCTTCCCGGTACGGGGTTCTCTACGCATAAACGCCGACAGGACAAATGCGGCACGGGCAACTCAAACGGTCGCTCTGTTGCAGCAAGCCAGGGAACGACCAGGTGCTCAGCGCCAGCAGAAACCGCCGCCCCAGCCAGAGCGGGAGAGACATGCTCAAGCCAGTCCCCAGGTTTAATCCAGGCCGACCACCGTTTTTCTGCATCGCTCAACCGAACCCACATTCCCTGTCGCGTCGGATATTCCAGCGTCGCTTCCCGGCCATGGCGCTGGCATTCTGTCGCGGTTTGCGCCAATAGCCATTCGCGACGATCAATCTGTCTCACACGCAATGACATCAGGCGTCATCCTCCTCGCCAGATTGCTGTCTGTGCTGTTGCTGCTGCGGATTTTGTTGATCGTCTCGCGTCAGGTGCCAGCGCTGGGGATTACCGTTTTGCCATTGATCATGCAAACGATGTTCAACCTGCGTATTTGACGGTATTAACGAAAACTCCCCTGCTTGCCGCGCCTGAATATTGACGGAATAGTCATTTCCCCAGCGCTGAAAACGGTAAGTCAGCGAGCTATCCTCTCCTTTCACGCCATCGGCAGTGGGAAAAATAGTCATCATCGGCTTTGATTGCGCCGCTAAAGGCATTTTTTCATCGCCGCCGGTTAATTGGCTAAGATCGGCGATAGTGGTTGGTTGCAGCGGAAGCTGAGAAACATCTTTAACCTTTTTATGATCTTTATCTTCAGGCTTACCGGTATTGGCTGCGGCCATTCGGGCAGGTGCGACATCCCGCGCCAGCGGCGCGCCCTCTTTACGAACGCCTTCGCCCGCGATGGCTTTATTATCCCCAGGCAATGCCTTGATATTATCGTCAGAGATTCCCGTGGCGTTATCGGTTACTTCACTAACGGATTCCACAGCTTTTAAATCAGCAGATAATTTTTTACCGCTTACGCTTTCTAACGGCCTATTTTTAGACGATAGCAACGCTGCGGATTTATCTACTTTGGCCTCCGCAGAGATCAAACCGACAGATTTTTCAGCAGTGACTTTCAACAGTTTTTCAGCAATCCTGAGTTCGCTTTTTCCGTTATGATGCAGACCAGAAACGTTGCCATTGTGATGTTCTGATTTCGCTGGCGCGCCATGTCGCCATGCCGCCAGTAATAACGGTAAAGCCGTTTCTTTATGCATTACGAAAGCATCGCCATAGTCGCGATCTTTTTTATCACCGGAATATTCTGTCTTATGTTTTTCCACCGCTTTTTTTAATGCTTCTGATAAACCGCCAACCTCATCCTGCTGCGGCAGTAAAATGTTCCCGGATGAACTGACAGCTGACACATCGCCCATTAAATTATCTCCTCTGACTCGGCCTCTTCCTGCTGTATCTCTCGCTGGATATAGAATCTTTTCTGACGGATTATCCAGCGTTGATAGTTCCCTTCTTTGCGCAACCAATATTTACTTTTTTTCTGAAACTCTTCCCTTTTCTTTTCCAGCTCGCTCCGTTTTTCCTGAATTTGTATAATCTGGAGTTCTAAATCTTTTATCTGCCGGCGAACAATAGACTGCTTACGTAATAACGTATAAATTTCCTCACGACTGAGCTGTCTGTTTTCTGCACGCAGCGTATCTAATAACAATTTCAGACCCGCTATTTGTTCAAGGATCGCCTCCTCCTCGGCCTGCAGCCCGCGGTCCTCATCCTGATAGCGAAGTAATATCGACTCACACTGTGAATGAAATACCGTACAGCGCCGCTGCAATACTTTAATTCTGGTCAGCGAATGCATTCATACCGCTCAACGTGTCATCAAAGGATGAATACTGCGCTACCGGCTGGCATAACCAGGCTTTCAGGCTATCCCGCATCTGCATCGCCCGATCGTTATCGATATTTTCGCCAGGACGATATTCTCCCAAGTCAATGAAAAGCTGGAGCTCTTCCAAACGCGTCATTAATTTACGCACGGCAGATGCCTGTTCAGCATGTGTCGGCGTCGTGACTTGTCCAAAAACGCGGCTTACGCTTTTCAGTACATCGATTGCCGGGTAATGTCCCTGCCCGGCCAGCTTTCTGCTCAGATACAGGTGACCGTCAAGGATAGAGCGAATTTCATCCGCCATCGGGTCCGCCTCTTCCTCGCTTTCCAGCAGTACCGTATAAAAGGCAGTAATGCTTCCCTCGCTGGTCGCCCCTGGGCGTTCCAGCAAGCGGGGCAAATTATCGAATACGGAGGCGGGATAACCTCGACGAGCCGGACGCTCTCCCGACGCCAGTGCCACGTCTCGCAAAGCACGCGCATAACGGGTCATGGAATCGATAAAAAGCACGACCCGTTTTCCCTGGTCGCGAAAATATTCCGCTACGGTTGTCGCCAGTTGCGCCGCATTGCAGCGATCGACCGAGGGGAAATCGGAAGTGGCAAAAACCAGCACGCATTTTTCTTTCTTATGCGAAGCGCGCAACATATCCACGAATTCAGTGACCTCACGGCCTCGTTCACCGATAAGACCGATAACAAAGACATCCGCCTCCGTTTGCTCGATCAGCATATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAATAACCCGTCAATCGCGCGCACACCGGTAATCAGCGGTTCACGGACGCCAACGCGTGAAGCGTAAGACGGCGGTGCGACATCAATAACGCGTTCTTCGCTAATCGGCGCCACTTCAGGGGTAAAACGCTCAACGATTTTCCCTGTCGGATCC'
def load_seq(fasta_file):
    """ Reads a FASTA file and returns the DNA sequence as a string.

    fasta_file: the path to the FASTA file containing the DNA sequence
    returns: the DNA sequence as a string
    """
    retval = ""
    f = open(fasta_file) # assign f to the opened fata file
    lines = f.readlines()   #assin lines to the lists created
    for l in lines[1:]:  #remove the ist line in fa(header)
        retval += l[0:-1] #append the lista without header to the empty list
    f.close()
    return retval
load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa')
'GGATCCGACAGGGAAAATCGTTGAGCGTTTTACCCCTGAAGTGGCGCCGATTAGCGAAGAACGCGTTATTGATGTCGCACCGCCGTCTTACGCTTCACGCGTTGGCGTCCGTGAACCGCTGATTACCGGTGTGCGCGCGATTGACGGGTTATTGACCTGTGGCGTAGGCCAGCGAATGGGCATTTTTGCCTCCGCAGGATGCGGTAAGACCATGCTGATGCATATGCTGATCGAGCAAACGGAGGCGGATGTCTTTGTTATCGGTCTTATCGGTGAACGAGGCCGTGAGGTCACTGAATTCGTGGATATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAATTAAAGTATTGCAGCGGCGCTGTACGGTATTTCATTCACAGTGTGAGTCGATATTACTTCGCTATCAGGATGAGGACCGCGGGCTGCAGGCCGAGGAGGAGGCGATCCTTGAACAAATAGCGGGTCTGAAATTGTTATTAGATACGCTGCGTGCAGAAAACAGACAGCTCAGTCGTGAGGAAATTTATACGTTATTACGTAAGCAGTCTATTGTTCGCCGGCAGATAAAAGATTTAGAACTCCAGATTATACAAATTCAGGAAAAACGGAGCGAGCTGGAAAAGAAAAGGGAAGAGTTTCAGAAAAAAAGTAAATATTGGTTGCGCAAAGAAGGGAACTATCAACGCTGGATAATCCGTCAGAAAAGATTCTATATCCAGCGAGAGATACAGCAGGAAGAGGCCGAGTCAGAGGAGATAATTTAATGGGCGATGTGTCAGCTGTCAGTTCATCCGGGAACATTTTACTGCCGCAGCAGGATGAGGTTGGCGGTTTATCAGAAGCATTAAAAAAAGCGGTGGAAAAACATAAGACAGAATATTCCGGTGATAAAAAAGATCGCGACTATGGCGATGCTTTCGTAATGCATAAAGAAACGGCTTTACCGTTATTACTGGCGGCATGGCGACATGGCGCGCCAGCGAAATCAGAACATCACAATGGCAACGTTTCTGGTCTGCATCATAACGGAAAAAGCGAACTCAGGATTGCTGAAAAACTGTTGAAAGTCACTGCTGAAAAATCTGTCGGTTTGATCTCTGCGGAGGCCAAAGTAGATAAATCCGCAGCGTTGCTATCGTCTAAAAATAGGCCGTTAGAAAGCGTAAGCGGTAAAAAATTATCTGCTGATTTAAAAGCTGTGGAATCCGTTAGTGAAGTAACCGATAACGCCACGGGAATCTCTGACGATAATATCAAGGCATTGCCTGGGGATAATAAAGCCATCGCGGGCGAAGGCGTTCGTAAAGAGGGCGCGCCGCTGGCGCGGGATGTCGCACCTGCCCGAATGGCCGCAGCCAATACCGGTAAGCCTGAAGATAAAGATCATAAAAAGGTTAAAGATGTTTCTCAGCTTCCGCTGCAACCAACCACTATCGCCGATCTTAGCCAATTAACCGGCGGCGATGAAAAAATGCCTTTAGCGGCGCAATCAAAGCCGATGATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCCTGATGTCATTGCGTGTGAGACAGATTGATCGTCGCGAATGGCTATTGGCGCAAACCGCGACAGAATGCCAGCGCCATGGCCGGGAAGCGACGCTGGAATATCCGACGCGACAGGGAATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGATTAAACCTGGGGACTGGCTTGAGCATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCGTAGAGAACCCCGTACCGGGAAGCGCGCTGCCGGAAGGGAAATTGTTGCACATTATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAATGATATCTCATTAATTGCCTTACTGGCATTTTCCACCCTGTTGCCATTTATTATTGCGTCAGGAACCTGTTTCGTTAAATTTTCTATTGTATTTGTCATGGTGCGTAACGCCCTGGGATTACAGCAGATACCTTCAAATATGACGCTTAACGGCGTCGCATTGCTGCTTTCTATGTTTGTTATGTGGCCCATAATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACATGACATCATTACGAGACGGGATAGTTAAATGGATGATTTAGTGTTTGCAGGTAATAAGGCGCTCTATCTTGTTTTGATCCTGTCAGGGTGGCCGACGATTGTCGCAACGATTATCGGCCTCCTGGTAGGGTTATTCCAGACGGTAACGCAATTACAGGAACAGACGCTGCCTTTTGGCATTAAATTACTTGGCGTGTGTTTATGCTTGTTTTTACTGTCTGGCTGGTATGGCGAAGTTTTACTCTCTTACGGGCGTCAGGTGATATTCCTGGCGTTGGCTAAGGGGTAAAAAATGTTTTACGCGTTGTACTTTGAAATTCATCACCTGGTTGCGTCTGCGGCGCTAGGGTTTGCTCGCGTGGCGCCGATTTTTTTCTTCCTGCCGTTTTTGAATAGCGGGGTATTAAGCGGTGCGCCGAGAAACGCCATTATCATCCTGGTGGCATTGGGAGTATGGCCGCATGCATTGAACGAGGCGCCGCCGTTTTTATCGGTGGCGATGATCCCGTTAGTTCTGCAAGAAGCGGCGGTAGGCGTCATGCTGGGCTGTCTGCTGTCATGGCCTTTTTGGGTTATGCATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGCTAAGTAGTAGTATCGATCCGGCAAACGGTATTGATACCTCGGAAATGGCTAATTTCCTGAATATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAATAAAACAGAAAAACCGACTAAAAAACGGCTGGAAGACTCCGCTAAAAAAGGCCAGTCATTTAAAAGTAAAGATCTCATTATCGCCTGCCTGACGCTGGGAGGAATTGCCTATCTGGTGTCGTATGGCTCATTTAATGAGTTTATGGGGATAATTAAGATCATTATTGCGGATAATTTTGATCAGAGCATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCATTGAGCCGCGTAAGGCAGTAGCGATGTATTCATTGGGCGTTTTTTGAATGTTCACTAACCACCGTCGGGGTTTAATAACTGCATCAGATAAACGCAGTCGTTAAGTTCTACAAAGTCGGTGACAGATAACAGGAGTAAGTAATGGATTATCAAAATAATGTCAGCGAAGAACGTGTTGCGGAAATGATTTGGGATGCCGTTAGTGAAGGCGCCACGCTAAAAGACGTTCATGGGATCC'
pwd
'/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna'
get_reverse_complement(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
NameError: name 'get_reverse_complement' is not defined
dna='ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
def get_reverse_complement(dna):
    complement = dna.replace("A","t")
    complement = complement.replace("T","a")
    complement = complement.replace("C","g")#replace nucleotides with small letters and later make them uppercase
    complement = complement.replace("G","c")
    complement = complement.upper()
    reverse = complement[::-1]#reverse using index
    return reverse
    """ Computes the reverse complementary sequence of DNA for the specfied DNA
        sequence|

        dna: a DNA sequence represented as a string
        returns: the reverse complementary DNA sequence represented as a string
    >>> get_reverse_complement("ATGCCCGCTTT")
    'AAAGCGGGCAT'
    >>> get_reverse_complement("CCGCGTTCA")
    'TGAACGCGG'
   """
#get_reverse_complement(dna)
get_reverse_complement(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
'GGATCCCATGAACGTCTTTTAGCGTGGCGCCTTCACTAACGGCATCCCAAATCATTTCCGCAACACGTTCTTCGCTGACATTATTTTGATAATCCATTACTTACTCCTGTTATCTGTCACCGACTTTGTAGAACTTAACGACTGCGTTTATCTGATGCAGTTATTAAACCCCGACGGTGGTTAGTGAACATTCAAAAAACGCCCAATGAATACATCGCTACTGCCTTACGCGGCTCAATGCCGTACCTCGTTTTCTTGTGGCTGAATAACGTCTTTGCCCGCGTTTTCTACCTCTTCCAGCCAAACCAGAAGACGTAAAACTTCATCAATTTCTTCCAGACTCACCAGATCATAACGGCGATGGGTTTTGAAAAGACTGCGCGCCAGTTTGATATCGACGATCACAGGTACGCCAACCTTCTCCGCATAGGCGCGGACGGCCAGTGCGCGCTGATTCGTTTCATACACCGAGATCATCGGAATCGGCATCAATTCGGGTTTAAAATAAATCCCGATCGTAATATGCGTGGGGTTGGCAACAATCAGGCGTGAGTTTTCAATATCAGATTTCACCTGTTCAGACAGAATTTCCATATGAACTTCACGTCTTTTAGATTTAACCTCTGGGTTCCCTTCCTGCTCCTTCATTTCACGCTTCACTTCTTCCTTATCCATTTTCATATCTTTCATGGTCAGGAAATATTCCGCAATAGCATCCAATAATAAGACAATCAATGCGCAAGCAAGGCAAGTTAATACCAATGCGAGGAGAAGTTCACGCCAAATGACGGCAATACCTACAATATTGCCATTTAGCTGAGAAAAGATTTCAACCTTATATTTCTTCCAGCAAATGATGGCGGCCACCACAAAGGATGAGAGATACAGTAGGGTTTTGACCGTATCTTTAACCGTGCGCATACTAAAAAGTTTTTTTGCCCCTTCTACCGGGTTTAACGCCGATAAATTAGGCTTTAATGCTTCTGTCGCCAGCACAAAACCGGCCTGTAATAACGCCGGTAATGCGGAACACACTAAGCAGAGCAGCATAAATGGAATCAGATATTTTAACCCTATCCCAAAAACGGCCAAACTGTAGTCAGCCATGCTCTGATCAAAATTATCCGCAATAATGATCTTAATTATCCCCATAAACTCATTAAATGAGCCATACGACACCAGATAGGCAATTCCTCCCAGCGTCAGGCAGGCGATAATGAGATCTTTACTTTTAAATGACTGGCCTTTTTTAGCGGAGTCTTCCAGCCGTTTTTTAGTCGGTTTTTCTGTTTTATTCGAGGACATGCGTCGCCCCTCGCTCGTAAAACCAACTGCTTAACCCTGTGGCCTGGAAAGAGAGTCGCAGTACATTGTCCGGTAGTACCGGAGAGAAATAAAGCAGCATAATTAAAACGGCAATACCGCTTTTTACCGTCAGTGAAATCGCAAAAGCGTTCATTTGCGGAGCAAAGCGCGACAATAAACCCAGGAATACTTCTGACAGCAACAGCACTAATACCACCGGACTGGCCAGAACCAAGGCGTTTTGAGCCACCTGATTAATAAACGTTAATAGCGGCGGTAATGAAGGCGTGCACTCGTTCATCGGATCGCATAGCTGATAGCTTTTATTTAACACGTCAACCATCGTGACCAGACCGCCGTTTTGTAAATAAACGACAGCGGCAAACATATTCAGGAAATTAGCCATTTCCGAGGTATCAATACCGTTTGCCGGATCGATACTACTACTTAGCGTTGCCCCTCGCTGGTTATCGATAATACAACCCAGCGCATGCATAACCCAAAAAGGCCATGACAGCAGACAGCCCAGCATGACGCCTACCGCCGCTTCTTGCAGAACTAACGGGATCATCGCCACCGATAAAAACGGCGGCGCCTCGTTCAATGCATGCGGCCATACTCCCAATGCCACCAGGATGATAATGGCGTTTCTCGGCGCACCGCTTAATACCCCGCTATTCAAAAACGGCAGGAAGAAAAAAATCGGCGCCACGCGAGCAAACCCTAGCGCCGCAGACGCAACCAGGTGATGAATTTCAAAGTACAACGCGTAAAACATTTTTTACCCCTTAGCCAACGCCAGGAATATCACCTGACGCCCGTAAGAGAGTAAAACTTCGCCATACCAGCCAGACAGTAAAAACAAGCATAAACACACGCCAAGTAATTTAATGCCAAAAGGCAGCGTCTGTTCCTGTAATTGCGTTACCGTCTGGAATAACCCTACCAGGAGGCCGATAATCGTTGCGACAATCGTCGGCCACCCTGACAGGATCAAAACAAGATAGAGCGCCTTATTACCTGCAAACACTAAATCATCCATTTAACTATCCCGTCTCGTAATGATGTCATGTTGCAATGTCCATATACTGTAATATCAATCCCTTAGACAGTAAGGTCCAGCCATCAAGCGCGACAAAAAGCACCAGCTTAATAGGTGTAGATATCGTCACCGGACTCATCATCATCATCCCCAGCGCCAGTAGCACGCTGGATACCACCAGGTCGACGACGACAAAGGGCAAATAAAGATAAAAACCAATTTTAAACGCGCTTTTTATTTCGCTCAGCGCATAAGCAGGTAATAACGCAAATATTGAAGGTTTTTCAATTTCATCTTTGTCACGCTTTACCGTCTCGGTCTCTTCTCCATACTGACGCTTCAGTTGCGCGTTTTCAAAAAACTGAACTAACTCGCGATCTGAATATTTGATCAGATAATCGCGATAACCATCCAGACCTTCATCAACGTGTTTACTTAATGATGAAATATCATTAAAGGTGACATCTTCGTCCTCAAAATAGACGTAGGCATCATGCATTATGGGCCACATAACAAACATAGAAAGCAGCAATGCGACGCCGTTAAGCGTCATATTTGAAGGTATCTGCTGTAATCCCAGGGCGTTACGCACCATGACAAATACAATAGAAAATTTAACGAAACAGGTTCCTGACGCAATAATAAATGGCAACAGGGTGGAAAATGCCAGTAAGGCAATTAATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCCTAAGGTGTCATTCATCTGTACCAGTTCGCCATTACCCAGCAAAACACCATTCGCCATAATTTCAACGTTAAGTTCAGCATTGGTCGGCAGTGATAATAGCTGTTGCTGCCCCATGGCTTCGAGTTCGGCGAGGGTAACGTTCTTACGATACAAAACAAATTCCAGTTTGACGGGCAATTGATTCAAGCCAGGCAGAGTTTCTGCAGTTTCAGTTGTATTATTTTCTTCTTCGATATGTTGAATATCTAACGTTTCCACAATAATTCCCCCTTCAACACGGTTGAAATGACCTAACTTTTTCGCGTAGCAATAAACTTCCGCACGGGAAGTACGAATCAGGAGTACATCTCCGATCCCGATTCGGCCCAGCAACGAACGCTGCGTATCACTGCTACCGATTACAAAGCGCAACGGCCAACGCAGCATTTTCGGCCTGCCGCCCCCGACTGCAGGCAGTTCAGGAAGATGCTCAAACCACAGGCCGCCCCGATCGCTCATAATGTGCAACAATTTCCCTTCCGGCAGCGCGCTTCCCGGTACGGGGTTCTCTACGCATAAACGCCGACAGGACAAATGCGGCACGGGCAACTCAAACGGTCGCTCTGTTGCAGCAAGCCAGGGAACGACCAGGTGCTCAGCGCCAGCAGAAACCGCCGCCCCAGCCAGAGCGGGAGAGACATGCTCAAGCCAGTCCCCAGGTTTAATCCAGGCCGACCACCGTTTTTCTGCATCGCTCAACCGAACCCACATTCCCTGTCGCGTCGGATATTCCAGCGTCGCTTCCCGGCCATGGCGCTGGCATTCTGTCGCGGTTTGCGCCAATAGCCATTCGCGACGATCAATCTGTCTCACACGCAATGACATCAGGCGTCATCCTCCTCGCCAGATTGCTGTCTGTGCTGTTGCTGCTGCGGATTTTGTTGATCGTCTCGCGTCAGGTGCCAGCGCTGGGGATTACCGTTTTGCCATTGATCATGCAAACGATGTTCAACCTGCGTATTTGACGGTATTAACGAAAACTCCCCTGCTTGCCGCGCCTGAATATTGACGGAATAGTCATTTCCCCAGCGCTGAAAACGGTAAGTCAGCGAGCTATCCTCTCCTTTCACGCCATCGGCAGTGGGAAAAATAGTCATCATCGGCTTTGATTGCGCCGCTAAAGGCATTTTTTCATCGCCGCCGGTTAATTGGCTAAGATCGGCGATAGTGGTTGGTTGCAGCGGAAGCTGAGAAACATCTTTAACCTTTTTATGATCTTTATCTTCAGGCTTACCGGTATTGGCTGCGGCCATTCGGGCAGGTGCGACATCCCGCGCCAGCGGCGCGCCCTCTTTACGAACGCCTTCGCCCGCGATGGCTTTATTATCCCCAGGCAATGCCTTGATATTATCGTCAGAGATTCCCGTGGCGTTATCGGTTACTTCACTAACGGATTCCACAGCTTTTAAATCAGCAGATAATTTTTTACCGCTTACGCTTTCTAACGGCCTATTTTTAGACGATAGCAACGCTGCGGATTTATCTACTTTGGCCTCCGCAGAGATCAAACCGACAGATTTTTCAGCAGTGACTTTCAACAGTTTTTCAGCAATCCTGAGTTCGCTTTTTCCGTTATGATGCAGACCAGAAACGTTGCCATTGTGATGTTCTGATTTCGCTGGCGCGCCATGTCGCCATGCCGCCAGTAATAACGGTAAAGCCGTTTCTTTATGCATTACGAAAGCATCGCCATAGTCGCGATCTTTTTTATCACCGGAATATTCTGTCTTATGTTTTTCCACCGCTTTTTTTAATGCTTCTGATAAACCGCCAACCTCATCCTGCTGCGGCAGTAAAATGTTCCCGGATGAACTGACAGCTGACACATCGCCCATTAAATTATCTCCTCTGACTCGGCCTCTTCCTGCTGTATCTCTCGCTGGATATAGAATCTTTTCTGACGGATTATCCAGCGTTGATAGTTCCCTTCTTTGCGCAACCAATATTTACTTTTTTTCTGAAACTCTTCCCTTTTCTTTTCCAGCTCGCTCCGTTTTTCCTGAATTTGTATAATCTGGAGTTCTAAATCTTTTATCTGCCGGCGAACAATAGACTGCTTACGTAATAACGTATAAATTTCCTCACGACTGAGCTGTCTGTTTTCTGCACGCAGCGTATCTAATAACAATTTCAGACCCGCTATTTGTTCAAGGATCGCCTCCTCCTCGGCCTGCAGCCCGCGGTCCTCATCCTGATAGCGAAGTAATATCGACTCACACTGTGAATGAAATACCGTACAGCGCCGCTGCAATACTTTAATTCTGGTCAGCGAATGCATTCATACCGCTCAACGTGTCATCAAAGGATGAATACTGCGCTACCGGCTGGCATAACCAGGCTTTCAGGCTATCCCGCATCTGCATCGCCCGATCGTTATCGATATTTTCGCCAGGACGATATTCTCCCAAGTCAATGAAAAGCTGGAGCTCTTCCAAACGCGTCATTAATTTACGCACGGCAGATGCCTGTTCAGCATGTGTCGGCGTCGTGACTTGTCCAAAAACGCGGCTTACGCTTTTCAGTACATCGATTGCCGGGTAATGTCCCTGCCCGGCCAGCTTTCTGCTCAGATACAGGTGACCGTCAAGGATAGAGCGAATTTCATCCGCCATCGGGTCCGCCTCTTCCTCGCTTTCCAGCAGTACCGTATAAAAGGCAGTAATGCTTCCCTCGCTGGTCGCCCCTGGGCGTTCCAGCAAGCGGGGCAAATTATCGAATACGGAGGCGGGATAACCTCGACGAGCCGGACGCTCTCCCGACGCCAGTGCCACGTCTCGCAAAGCACGCGCATAACGGGTCATGGAATCGATAAAAAGCACGACCCGTTTTCCCTGGTCGCGAAAATATTCCGCTACGGTTGTCGCCAGTTGCGCCGCATTGCAGCGATCGACCGAGGGGAAATCGGAAGTGGCAAAAACCAGCACGCATTTTTCTTTCTTATGCGAAGCGCGCAACATATCCACGAATTCAGTGACCTCACGGCCTCGTTCACCGATAAGACCGATAACAAAGACATCCGCCTCCGTTTGCTCGATCAGCATATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAATAACCCGTCAATCGCGCGCACACCGGTAATCAGCGGTTCACGGACGCCAACGCGTGAAGCGTAAGACGGCGGTGCGACATCAATAACGCGTTCTTCGCTAATCGGCGCCACTTCAGGGGTAAAACGCTCAACGATTTTCCCTGTCGGATCC'
def rest_of_ORF(dna):
   
    """ Takes a DNA sequence that is assumed to begin with a start
        codon and returns the sequence up to but not including the
        first in frame stop codon.  If there is no in frame stop codon,
        returns the whole string.

        dna: a DNA sequence
        returns: the open reading frame represented as a string
    >>> rest_of_ORF("ATGTGAA")
    'ATG'
    >>> rest_of_ORF("ATGAGATAGG")
    'ATGAGA'
    """
    #dna = 'ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
    Orf1 = []
    for i in range(0, len(dna), 3 ): 
        Orf1.append( dna[0+int(i):3+int(i)] ) #APPend the 3 nucleotide code to the empty list
    #print("ORF IS ---->", Orf1, "\n")
    lis = []
    stops = ['TAG', 'TAA', 'TGA']
    for i in stops:
        if i in Orf1:
            lis.append(i)#list the stop condons in the orf
           # print("All stop codons in this orf ---->", lis, "\n")
    indexs_stops = []
    for i in lis:
        indexs_stops.append(int(Orf1.index(i)))     #append the index of stop codes and sort threm
    sorted_indexs_stops = sorted(indexs_stops)
    start = (Orf1.index("ATG")) #start index
    #print(start)
    if len(sorted_indexs_stops) >= 1: 
        stop = sorted_indexs_stops[0]#stop index
        return ("".join(Orf1[start:stop]))#prints orf
        pass
    else:
        return ("".join(Orf1[start:]))
#rest_of_ORF(dna)
rest_of_ORF(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
''
rest_of_ORF(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
''
def find_all_ORFs_oneframe(dna):
    """ Finds all non-nested open reading frames in the given DNA
        sequence and returns them as a list.  This function should
        only find ORFs that are in the default frame of the sequence
        (i.e. they start on indices that are multiples of 3).
        By non-nested we mean that if an ORF occurs entirely within
        another ORF, it should not be included in the returned list of ORFs.

        dna: a DNA sequence
        returns: a list of non-nested ORFs
    >>> find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC")
    ['ATGCATGAATGTAGA', 'ATGTGCCC']
    """
    listOfOrf = list()
   # dna='ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
    frames = [] # storing the default reading frame
    frames.append([dna[i:i + 3] for i in range(0, len(dna), 3)])
    #print(frames)
    for i in range(0,len(frames),1): #looping  the dna frame
        start=0
        while start <len(frames[i]): #looping the frame for start and stop codons
            if frames[i][start]=="ATG":
                for stop in range(start+1,len(frames[i]),1):
                    if frames[i][stop]=="TAA" or  frames[i][stop]=="TAG" or  frames[i][stop]=="TGA" :
                        listOfOrf.append(' '.join(frames[i][start:stop])) # retrieve the orf
                        break
                else:
                     listOfOrf.append(' '.join(frames[i][start:]))
            start+=1
    one_f_orf =(",".join(listOfOrf).replace(" ",""))
    one_f_orf = one_f_orf.split(",")
    return one_f_orf
#find_all_ORFs_oneframe(dna)
    # TODO: implement this
def find_all_ORFs(dna):
    """ Finds all non-nested open reading frames in the given DNA sequence in
        all 3 possible frames and returns them as a list.  By non-nested we
        mean that if an ORF occurs entirely within another ORF and they are
        both in the same frame, it should not be included in the returned list
        of ORFs.

        dna: a DNA sequence
        returns: a list of non-nested ORFs

    >>> find_all_ORFs("ATGCATGAATGTAG")
    ['ATGCATGAATGTAG', 'ATGAATGTAG', 'ATG']
    """
    dna='ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
    listOfOrf = list()
    frames = [] # storing the three reading frames create the positive frames
    frames.append([dna[i:i + 3] for i in range(0, len(dna), 3)])
    frames.append([dna[i:i + 3] for i in range(1, len(dna), 3)])
    frames.append([dna[i:i + 3] for i in range(2, len(dna), 3)])
    #print(frames)
    for i in range(0,len(frames),1): #looping all the frames
        start=0
        while start <len(frames[i]): #looping each frame for start and stop codons
            if frames[i][start]=="ATG":
                for stop in range(start+1,len(frames[i]),1):
                    if frames[i][stop]=="TAA" or  frames[i][stop]=="TAG" or  frames[i][stop]=="TGA" :
                        listOfOrf.append(' '.join(frames[i][start:stop])) # retrieve the orf
                        break
                else:
                     listOfOrf.append(' '.join(frames[i][start:]))
            start+=1
    all_orf= ",".join(listOfOrf).replace(" ","")
    all_orf = all_orf.split(",")
    return all_orf
find_all_ORFs(dna)
['ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCA',
 'ATGCTC',
 'ATGAATGACGTAGCG',
 'ATGCGAGCCCTACCG',
 'ATGACG',
 'ATGAGAGATGCTCTAGGCTATGAA']
find_all_ORFs_oneframe(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
['ATGCGG',
 'ATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGA',
 'ATGCCAGCCGGTAGCGCAGTATTCATCCTT',
 'ATGAGGACCGCGGGCTGCAGGCCGAGGAGGAGGCGATCCTTGAACAAA',
 'ATGGCGACATGGCGCGCCAGCGAAATCAGAACATCACAATGGCAACGTTTCTGGTCTGCATCA',
 'ATGGCAAAACGG',
 'ATGTCATTGCGTGTGAGACAGATTGATCGTCGCGAATGGCTATTGGCGCAAACCGCGACAGAATGCCAGCGCCATGGCCGGGAAGCGACGCTGGAATATCCGACGCGACAGGGAATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGATTAAACCTGGGGACTGGCTTGAGCATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCGTAGAGAACCCCGTACCGGGAAGCGCGCTGCCGGAAGGGAAATTGTTGCACATTATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGATTAAACCTGGGGACTGGCTTGAGCATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCGTAGAGAACCCCGTACCGGGAAGCGCGCTGCCGGAAGGGAAATTGTTGCACATTATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGACATCATTACGAGACGGGATAGTTAAATGGATGATT',
 'ATGATT',
 'ATGGCGAAGTTTTACTCTCTTACGGGCGTCAGG',
 'ATGCAT',
 'ATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGC',
 'ATGTACTGCGACTCTCTTTCCAGGCCACAGGGT',
 'ATGTCCTCGAATAAAACAGAAAAACCGACTAAAAAACGGCTGGAAGACTCCGCTAAAAAAGGCCAGTCATTTAAAAGTAAAGATCTCATTATCGCCTGCCTGACGCTGGGAGGAATTGCCTATCTGGTGTCGTATGGCTCATTTAATGAGTTTATGGGGATAATTAAGATCATTATTGCGGATAATTTTGATCAGAGCATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGGGATAATTAAGATCATTATTGCGGATAATTTTGATCAGAGCATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGTCAGCGAAGAACGTGTTGCGGAAA',
 'ATGCCGTTAGTGAAGGCGCCACGC',
 'ATGGGATCC']
def find_all_ORFs_both_strands(dna):
    """ Finds all non-nested open reading frames in the given DNA sequence on both
        strands.
ATGAGGCTCAGGGATGATCTTGGGTTTTGTAATGGTCGCTGTACGATTATGATCG


        dna: a DNA sequence
        returns: a list of non-nested ORFs
    >>> find_all_ORFs_both_strands("ATGCGAATGTAGCATCAAA")
    ['ATGCGAATG', 'ATGCTACATTCGCAT']
    """
    #dna='ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
    listOfOrf = list()
    frames = [] # storing the six frames that would be extacted from the fragments
    reverseCdna = [] # storing the reverse compliments
    frames.append([dna[i:i + 3] for i in range(0, len(dna), 3)])
    frames.append([dna[i:i + 3] for i in range(1, len(dna), 3)])
    frames.append([dna[i:i + 3] for i in range(2, len(dna), 3)])
    # reverse compliment of the fragment
    reverse = {"A": "T", "C": "G", "T": "A", "G": "C"}
    for i in range(len(dna)):
        reverseCdna.append(reverse[dna[-i - 1]]) if dna[-i - 1] in reverse.keys() else reverseCdna.append(dna[-i - 1])  # if any  contamination found we keep it for further more check
    reverseCdna = ''.join(reverseCdna) # joining
    # create the reverse complement  frames
    frames.append([reverseCdna[i:i + 3] for i in range(0, len(reverseCdna), 3)])
    frames.append([reverseCdna[i:i + 3] for i in range(1, len(reverseCdna), 3)])
    frames.append([reverseCdna[i:i + 3] for i in range(2, len(reverseCdna), 3)])
    #print(frames)
    #print(reverseCdna)
    for i in range(0,len(frames),1): #looping all the frames
        start=0
        while start <len(frames[i]): #looping each frame for start and stop codons
            if frames[i][start]=="ATG":
                for stop in range(start+1,len(frames[i]),1):
                    if frames[i][stop]=="TAA" or  frames[i][stop]=="TAG" or  frames[i][stop]=="TGA" :
                        listOfOrf.append(' '.join(frames[i][start:stop])) # retrieve the orf
                        break
                else:
                     listOfOrf.append(' '.join(frames[i][start:]))
            start+=1
    my_string =",".join(listOfOrf).replace(" ","")
    my_list = my_string.split(",")
    return my_list
#find_all_ORFs_both_strands(dna)
find_all_ORFs_both_strands(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
['ATGCGG',
 'ATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGA',
 'ATGCCAGCCGGTAGCGCAGTATTCATCCTT',
 'ATGAGGACCGCGGGCTGCAGGCCGAGGAGGAGGCGATCCTTGAACAAA',
 'ATGGCGACATGGCGCGCCAGCGAAATCAGAACATCACAATGGCAACGTTTCTGGTCTGCATCA',
 'ATGGCAAAACGG',
 'ATGTCATTGCGTGTGAGACAGATTGATCGTCGCGAATGGCTATTGGCGCAAACCGCGACAGAATGCCAGCGCCATGGCCGGGAAGCGACGCTGGAATATCCGACGCGACAGGGAATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGATTAAACCTGGGGACTGGCTTGAGCATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCGTAGAGAACCCCGTACCGGGAAGCGCGCTGCCGGAAGGGAAATTGTTGCACATTATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGATTAAACCTGGGGACTGGCTTGAGCATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCGTAGAGAACCCCGTACCGGGAAGCGCGCTGCCGGAAGGGAAATTGTTGCACATTATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGAGCGATCGGGGCGGCCTGTGGTTTGAGCATCTTCCTGAACTGCCTGCAGTCGGGGGCGGCAGGCCGAAAATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGCTGCGTTGGCCGTTGCGCTTTGTAATCGGTAGCAGTGATACGCAGCGTTCGTTGCTGGGCCGAATCGGGATCGGAGATGTACTCCTGATTCGTACTTCCCGTGCGGAAGTTTATTGCTACGCGAAAAAGTTAGGTCATTTCAACCGTGTTGAAGGGGGAATTATTGTGGAAACGTTAGATATTCAACATATCGAAGAAGAAAATAATACAACTGAAACTGCAGAAACTCTGCCTGGCTTGAATCAATTGCCCGTCAAACTGGAATTTGTTTTGTATCGTAAGAACGTTACCCTCGCCGAACTCGAAGCCATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGGGGCAGCAACAGCTATTATCACTGCCGACCAATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGAATGACACCTTAGGCGTTGAGATCCATGAATGGCTGAGCGAGTCTGGTAATGGGGAA',
 'ATGACATCATTACGAGACGGGATAGTTAAATGGATGATT',
 'ATGATT',
 'ATGGCGAAGTTTTACTCTCTTACGGGCGTCAGG',
 'ATGCAT',
 'ATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGC',
 'ATGTACTGCGACTCTCTTTCCAGGCCACAGGGT',
 'ATGTCCTCGAATAAAACAGAAAAACCGACTAAAAAACGGCTGGAAGACTCCGCTAAAAAAGGCCAGTCATTTAAAAGTAAAGATCTCATTATCGCCTGCCTGACGCTGGGAGGAATTGCCTATCTGGTGTCGTATGGCTCATTTAATGAGTTTATGGGGATAATTAAGATCATTATTGCGGATAATTTTGATCAGAGCATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGGGATAATTAAGATCATTATTGCGGATAATTTTGATCAGAGCATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGCTGACTACAGTTTGGCCGTTTTTGGGATAGGGTTAAAATATCTGATTCCATTTATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGCTGCTCTGCTTAGTGTGTTCCGCATTACCGGCGTTATTACAGGCCGGTTTTGTGCTGGCGACAGAAGCATTAAAGCCTAATTTATCGGCGTTAAACCCGGTAGAAGGGGCAAAAAAACTTTTTAGTATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGCGCACGGTTAAAGATACGGTCAAAACCCTACTGTATCTCTCATCCTTTGTGGTGGCCGCCATCATTTGCTGGAAGAAATATAAGGTTGAAATCTTTTCTCAGCTAAATGGCAATATTGTAGGTATTGCCGTCATTTGGCGTGAACTTCTCCTCGCATTGGTATTAACTTGCCTTGCTTGCGCATTGATTGTCTTATTATTGGATGCTATTGCGGAATATTTCCTGACCATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGAAAGATATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGAAAATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGATAAGGAAGAAGTGAAGCGTGAAATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGAAGGAGCAGGAAGGGAACCCAGAGGTTAAATCTAAAAGACGTGAAGTTCATATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGGAAATTCTGTCTGAACAGGTGAAATCTGATATTGAAAACTCACGCCTGATTGTTGCCAACCCCACGCATATTACGATCGGGATTTATTTTAAACCCGAATTGATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGCCGATTCCGATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGATCTCGGTGTATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTGTGATCGTCGATATCAAACTGGCGCGCAGTCTTTTCAAAACCCATCGCCGTTATGATCTGGTGAGTCTGGAAGAAATTGATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGGTAGAAAACGCGGGCAAAGACGTTATTCAGCCACAAGAAAACGAGGTACGGCAT',
 'ATGTCAGCGAAGAACGTGTTGCGGAAA',
 'ATGCCGTTAGTGAAGGCGCCACGC',
 'ATGGGATCC',
 'ATGGGCATTTTTGCCTCCGCAGGATGCGGTAAGACCATGCTGATGCATATGCTGATCGAGCAAACGGAGGCGGATGTCTTTGTTATCGGTCTTATCGGTGAACGAGGCCGTGAGGTCACTGAATTCGTGGATATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGCTGATGCATATGCTGATCGAGCAAACGGAGGCGGATGTCTTTGTTATCGGTCTTATCGGTGAACGAGGCCGTGAGGTCACTGAATTCGTGGATATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGCATATGCTGATCGAGCAAACGGAGGCGGATGTCTTTGTTATCGGTCTTATCGGTGAACGAGGCCGTGAGGTCACTGAATTCGTGGATATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGCTGATCGAGCAAACGGAGGCGGATGTCTTTGTTATCGGTCTTATCGGTGAACGAGGCCGTGAGGTCACTGAATTCGTGGATATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCGTAGCGGAATATTTTCGCGACCAGGGAAAACGGGTCGTGCTTTTTATCGATTCCATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGACCCGTTATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATCTGAGCAGAAAGCTGGCCGGGCAGGGACATTACCCGGCAATCGATGTACTGAAAAGCGTAAGCCGCGTTTTTGGACAAGTCACGACGCCGACACATGCTGAACAGGCATCTGCCGTGCGTAAATTAATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGACGCGTTTGGAAGAGCTCCAGCTTTTCATTGACTTGGGAGAATATCGTCCTGGCGAAAATATCGATAACGATCGGGCGATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGCAGATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGCGGGATAGCCTGAAAGCCTGGTTATGCCAGCCGGTAGCGCAGTATTCATCCTTTGATGACACGTTGAGCGGTATGAATGCATTCGCTGACCAGAAT',
 'ATGAATGCATTCGCTGACCAGAAT',
 'ATGGGCGATGTGTCAGCTGTCAGTTCATCCGGGAACATTTTACTGCCGCAGCAGGATGAGGTTGGCGGTTTATCAGAAGCATTAAAAAAAGCGGTGGAAAAACATAAGACAGAATATTCCGGTGATAAAAAAGATCGCGACTATGGCGATGCTTTCGTAATGCATAAAGAAACGGCTTTACCGTTATTACTGGCGGCATGGCGACATGGCGCGCCAGCGAAATCAGAACATCACAATGGCAACGTTTCTGGTCTGCATCATAACGGAAAAAGCGAACTCAGGATTGCTGAAAAACTGTTGAAAGTCACTGCTGAAAAATCTGTCGGTTTGATCTCTGCGGAGGCCAAAGTAGATAAATCCGCAGCGTTGCTATCGTCTAAAAATAGGCCGTTAGAAAGCGTAAGCGGTAAAAAATTATCTGCTGATTTAAAAGCTGTGGAATCCGTTAGTGAAGTAACCGATAACGCCACGGGAATCTCTGACGATAATATCAAGGCATTGCCTGGGGATAATAAAGCCATCGCGGGCGAAGGCGTTCGTAAAGAGGGCGCGCCGCTGGCGCGGGATGTCGCACCTGCCCGAATGGCCGCAGCCAATACCGGTAAGCCTGAAGATAAAGATCATAAAAAGGTTAAAGATGTTTCTCAGCTTCCGCTGCAACCAACCACTATCGCCGATCTTAGCCAATTAACCGGCGGCGATGAAAAAATGCCTTTAGCGGCGCAATCAAAGCCGATGATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCC',
 'ATGCATAAAGAAACGGCTTTACCGTTATTACTGGCGGCATGGCGACATGGCGCGCCAGCGAAATCAGAACATCACAATGGCAACGTTTCTGGTCTGCATCATAACGGAAAAAGCGAACTCAGGATTGCTGAAAAACTGTTGAAAGTCACTGCTGAAAAATCTGTCGGTTTGATCTCTGCGGAGGCCAAAGTAGATAAATCCGCAGCGTTGCTATCGTCTAAAAATAGGCCGTTAGAAAGCGTAAGCGGTAAAAAATTATCTGCTGATTTAAAAGCTGTGGAATCCGTTAGTGAAGTAACCGATAACGCCACGGGAATCTCTGACGATAATATCAAGGCATTGCCTGGGGATAATAAAGCCATCGCGGGCGAAGGCGTTCGTAAAGAGGGCGCGCCGCTGGCGCGGGATGTCGCACCTGCCCGAATGGCCGCAGCCAATACCGGTAAGCCTGAAGATAAAGATCATAAAAAGGTTAAAGATGTTTCTCAGCTTCCGCTGCAACCAACCACTATCGCCGATCTTAGCCAATTAACCGGCGGCGATGAAAAAATGCCTTTAGCGGCGCAATCAAAGCCGATGATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCC',
 'ATGGCCGCAGCCAATACCGGTAAGCCTGAAGATAAAGATCATAAAAAGGTTAAAGATGTTTCTCAGCTTCCGCTGCAACCAACCACTATCGCCGATCTTAGCCAATTAACCGGCGGCGATGAAAAAATGCCTTTAGCGGCGCAATCAAAGCCGATGATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCC',
 'ATGCCTTTAGCGGCGCAATCAAAGCCGATGATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCC',
 'ATGATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCC',
 'ATGACTATTTTTCCCACTGCCGATGGCGTGAAAGGAGAGGATAGCTCGCTGACTTACCGTTTTCAGCGCTGGGGAAATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGTTAATACCGTCAAATACGCAGGTTGAACATCGTTTGCATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACCTGACGCGAGACGATCAACAAAATCCGCAGCAGCAACAGCACAGACAGCAATCTGGCGAGGAGGATGACGCC',
 'ATGGCCGGGAAGCGACGCTGGAATATCCGACGCGACAGGGAATGTGGGTTCGGT',
 'ATGCAGAAAAACGGTGGTCGGCCTGGATTAAACCTGGGGACTGGCTTGAGCATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCG',
 'ATGTCTCTCCCGCTCTGGCTGGGGCGGCGGTTTCTGCTGGCGCTGAGCACCTGGTCGTTCCCTGGCTTGCTGCAACAGAGCGACCGTTTGAGTTGCCCGTGCCGCATTTGTCCTGTCGGCGTTTATGCG',
 'ATGTACTCC',
 'ATGCTGAACTTAACGTTGAAATTATGGCGAATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGA',
 'ATGGTGTTTTGCTGGGTAATGGCGAACTGGTACAGA',
 'ATGGCGAACTGGTACAGA',
 'ATGACACCT',
 'ATGAATGGC',
 'ATGGGGAATGATATCTCATTAATTGCCTTACTGGCATTTTCCACCCTGTTGCCATTTATTATTGCGTCAGGAACCTGTTTCGTTAAATTTTCTATTGTATTTGTCATGGTGCGTAACGCCCTGGGATTACAGCAGATACCTTCAAATATGACGCTTAACGGCGTCGCATTGCTGCTTTCTATGTTTGTTATGTGGCCCATAATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGGTGCGTAACGCCCTGGGATTACAGCAGATACCTTCAAATATGACGCTTAACGGCGTCGCATTGCTGCTTTCTATGTTTGTTATGTGGCCCATAATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGACGCTTAACGGCGTCGCATTGCTGCTTTCTATGTTTGTTATGTGGCCCATAATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGTTTGTTATGTGGCCCATAATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGTGGCCCATAATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGCATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCATTAAGTAAACACGTTGATGAAGGTCTGGATGGTTATCGCGATTATCTGATCAAATATTCAGATCGCGAGTTAGTTCAGTTTTTTGAAAACGCGCAACTGAAGCGTCAGTATGGAGAAGAGACCGAGACGGTAAAGCGTGACAAAGATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGCTGAGCGAAATAAAAAGCGCGTTTAAAATTGGTTTTTATCTTTATTTGCCCTTTGTCGTCGTCGACCTGGTGGTATCCAGCGTGCTACTGGCGCTGGGGATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGAGTCCGGTGACGATATCTACACCTATTAAGCTGGTGCTTTTTGTCGCGCTTGATGGCTGGACCTTACTGTCTAAGGGATTGATATTACAGTATATGGACATTGCAACA',
 'ATGGACATTGCAACA',
 'ATGCTTGTTTTTACTGTCTGGCTGGTATGGCGAAGTTTTACTCTCTTACGGGCGTCAGGTGATATTCCTGGCGTTGGC',
 'ATGGCCGCATGCATTGAACGAGGCGCCGCCGTTTTTATCGGTGGCGATGATCCCGTTAGTTCTGCAAGAAGCGGCGGTAGGCGTCATGCTGGGCTGTCTGCTGTCATGGCCTTTTTGGGTTATGCATGCGCTGGGTTGTATTATCGA',
 'ATGGCCTTTTTGGGTTATGCATGCGCTGGGTTGTATTATCGA',
 'ATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTAT',
 'ATGGCTCATTTAATGAGTTTATGGGGA',
 'ATGAGTTTATGGGGA',
 'ATGGCAATATTG',
 'ATGCTATTGCGGAATATTTCC',
 'ATGAAACGAATCAGCGCGCACTGGCCGTCCGCGCCTATGCGGAGAAGGTTGGCGTACCTG',
 'ATGCGGAGAAGGTTGGCGTACCTG',
 'ATGATCTGG',
 'ATGAAGTTTTACGTCTTCTGGTTTGGCTGGAAGAGG',
 'ATGTATTCATTGGGCGTTTTT',
 'ATGTTCACTAACCACCGTCGGGGTTTAATAACTGCATCAGATAAACGCAGTCGT',
 'ATGTCGCACCGCCGTCTTACGCTTCACGCGTTGGCGTCCGTGAACCGC',
 'ATGTCTTTGTTATCGGTCTTATCGGTGAACGAGGCCGTGAGGTCACTGAATTCGTGGATATGTTGCGCGCTTCGCATAAGAAAGAAAAATGCGTGCTGGTTTTTGCCACTTCCGATTTCCCCTCGGTCGATCGCTGCAATGCGGCGCAACTGGCGACAACCG',
 'ATGCGGCGCAACTGGCGACAACCG',
 'ATGCGCGTGCTTTGCGAGACGTGGCACTGGCGTCGGGAGAGCGTCCGGCTCGTCGAGGTTATCCCGCCTCCGTATTCGATAATTTGCCCCGCTTGCTGGAACGCCCAGGGGCGACCAGCGAGGGAAGCATTACTGCCTTTTATACGGTACTGCTGGAAAGCGAGGAAGAGGCGGACCCGATGGCGGATGAAATTCGCTCTATCCTTGACGGTCACCTGTATC',
 'ATGAAATTCGCTCTATCCTTGACGGTCACCTGTATC',
 'ATGTAC',
 'ATGCTGAACAGGCATCTGCCGTGCGTAAAT',
 'ATGACACGT',
 'ATGCATTCGCTGACCAGAATTAAAGTATTGCAGCGGCGCTGTACGGTATTTCATTCACAGTGTGAGTCGATATTACTTCGCTATCAGGATGAGGACCGCGGGCTGCAGGCCGAGGAGGAGGCGATCCTTGAACAAATAGCGGGTCTGAAATTGTTATTAGATACGCTGCGTGCAGAAAACAGACAGCTCAGTCGTGAGGAAATTTATACGTTATTACGTAAGCAGTCTATTGTTCGCCGGCAGATAAAAGATTTAGAACTCCAGATTATACAAATTCAGGAAAAACGGAGCGAGCTGGAAAAGAAAAGGGAAGAGTTTCAGAAAAAAAGTAAATATTGGTTGCGCAAAGAAGGGAACTATCAACGCTGGATAATCCGTCAGAAAAGATTCTATATCCAGCGAGAGATACAGCAGGAAGAGGCCGAGTCAGAGGAGATAATT',
 'ATGTGTCAGCTGTCAGTTCATCCGGGAACATTTTACTGCCGCAGCAGGATGAGGTTGGCGGTTTATCAGAAGCAT',
 'ATGAGGTTGGCGGTTTATCAGAAGCAT',
 'ATGGCGATGCTTTCG',
 'ATGCTTTCG',
 'ATGGCGCGCCAGCGAAATCAGAACATCACAATGGCAACGTTTCTGGTCTGCATCATAACGGAAAAAGCGAACTCAGGATTGCTGAAAAACTGT',
 'ATGGCAACGTTTCTGGTCTGCATCATAACGGAAAAAGCGAACTCAGGATTGCTGAAAAACTGT',
 'ATGTCGCACCTGCCCGAATGGCCGCAGCCAATACCGGTAAGCCTGAAGATAAAGATCATAAAAAGGTTAAAGATGTTTCTCAGCTTCCGCTGCAACCAACCACTATCGCCGATCTTAGCCAAT',
 'ATGTTTCTCAGCTTCCGCTGCAACCAACCACTATCGCCGATCTTAGCCAAT',
 'ATGAAAAAATGCCTT',
 'ATGGCG',
 'ATGACTATTCCGTCAATATTCAGGCGCGGCAAGCAGGGGAGTTTTCGT',
 'ATGATCAATGGCAAAACGGTAATCCCCAGCGCTGGCACC',
 'ATGACGCCTGATGTCATTGCGTGTGAGACAGAT',
 'ATGGCTATTGGCGCAAACCGCGACAGAATGCCAGCGCCATGGCCGGGAAGCGACGCTGGAATATCCGACGCGACAGGGAATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGAT',
 'ATGCCAGCGCCATGGCCGGGAAGCGACGCTGGAATATCCGACGCGACAGGGAATGTGGGTTCGGTTGAGCGATGCAGAAAAACGGTGGTCGGCCTGGAT',
 'ATGCGTAGAGAACCCCGTACCGGGAAGCGCGCTGCCGGAAGGGAAATTGTTGCACATTATGAGCGATCGGGGCGGCCTGTGGTT',
 'ATGGCTGAGCGAGTCTGG',
 'ATGATATCTCAT',
 'ATGATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCAT',
 'ATGCCTACGTCTATTTTGAGGACGAAGATGTCACCTTTAATGATATTTCATCAT',
 'ATGTCACCTTTAATGATATTTCATCAT',
 'ATGATATTTCATCAT',
 'ATGAAGGTCTGGATGGTTATCGCGATTATC',
 'ATGGTTATCGCGATTATC',
 'ATGGAGAAGAGACCGAGACGG',
 'ATGAAATTGAAAAACCTTCAATATTTGCGTTATTACCTGCTTATGCGC',
 'ATGCGC',
 'ATGGCTGGACCTTACTGTCTAAGGGAT',
 'ATGGATGATTTAGTGTTTGCAGGTAATAAGGCGCTCTATCTTGTTTTGATCCTGTCAGGGTGGCCGACGATTGTCGCAACGATTATCGGCCTCCTGGTAGGGTTATTCCAGACGGTAACGCAATTACAGGAACAGACGCTGCCTTTTGGCATTAAATTACTTGGCGTGTGTTTATGCTTGTTTTTACTGTCTGGCTGGTATGGCGAAGTTTTACTCTCTTACGGGCGTCAGGTGATATTCCTGGCGTTGGCTAAGGGG',
 'ATGTTTTACGCGTTGTACTTTGAAATTCATCACCTGGTTGCGTCTGCGGCGCTAGGGTTTGCTCGCGTGGCGCCGATTTTTTTCTTCCTGCCGTTTTTGAATAGCGGGGTATTAAGCGGTGCGCCGAGAAACGCCATTATCATCCTGGTGGCATTGGGAGTATGGCCGCATGCATTGAACGAGGCGCCGCCGTTTTTATCGGTGGCGATGATCCCGTTAGTTCTGCAAGAAGCGGCGGTAGGCGTCATGCTGGGCTGTCTGCTGTCATGGCCTTTTTGGGTTATGCATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGCTAAGTAGTAGTATCGATCCGGCAAACGGTATTGATACCTCGGAAATGGCTAATTTCCTGAATATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGATCCCGTTAGTTCTGCAAGAAGCGGCGGTAGGCGTCATGCTGGGCTGTCTGCTGTCATGGCCTTTTTGGGTTATGCATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGCTAAGTAGTAGTATCGATCCGGCAAACGGTATTGATACCTCGGAAATGGCTAATTTCCTGAATATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGCTGGGCTGTCTGCTGTCATGGCCTTTTTGGGTTATGCATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGCTAAGTAGTAGTATCGATCCGGCAAACGGTATTGATACCTCGGAAATGGCTAATTTCCTGAATATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGCATGCGCTGGGTTGTATTATCGATAACCAGCGAGGGGCAACGCTAAGTAGTAGTATCGATCCGGCAAACGGTATTGATACCTCGGAAATGGCTAATTTCCTGAATATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGGCTAATTTCCTGAATATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGTTTGCCGCTGTCGTTTATTTACAAAACGGCGGTCTGGTCACGATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGGTTGACGTGTTAAATAAAAGCTATCAGCTATGCGATCCGATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGAACGAGTGCACGCCTTCATTACCGCCGCTATTAACGTTTATTAATCAGGTGGCTCAAAACGCCTTGGTTCTGGCCAGTCCGGTGGTATTAGTGCTGTTGCTGTCAGAAGTATTCCTGGGTTTATTGTCGCGCTTTGCTCCGCAAATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGAACGCTTTTGCGATTTCACTGACGGTAAAAAGCGGTATTGCCGTTTTAATTATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGCTGCTTTATTTCTCTCCGGTACTACCGGACAATGTACTGCGACTCTCTTTCCAGGCCACAGGGTTAAGCAGTTGGTTTTACGAGCGAGGGGCGACGCATGTCCTCGAA',
 'ATGGATTATCAAAATAATGTCAGCGAAGAACGTGTTGCGGAAATGATTTGGGATGCCGTTAGTGAAGGCGCCACGCTAAAAGACGTTCATGGGATCC',
 'ATGATTTGGGATGCCGTTAGTGAAGGCGCCACGCTAAAAGACGTTCATGGGATCC',
 'ATGCCGTACCTCGTTTTCTTGTGGCTGAATAACGTCTTTGCCCGCGTTTTCTACCTCTTCCAGCCAAACCAGAAGACG',
 'ATGGGTTTTGAAAAGACTGCGCGCCAGTTTGATATCGACGATCACAGGTACGCCAACCTTCTCCGCATAGGCGCGGACGGCCAGTGCGCGCTGATTCGTTTCATACACCGAGATCATCGGAATCGGCATCAATTCGGGTTTAAAATAAATCCCGATCGTAATATGCGTGGGGTTGGCAACAATCAGGCG',
 'ATGCGTGGGGTTGGCAACAATCAGGCG',
 'ATGAACTTCACGTCTTTTAGATTTAACCTCTGGGTTCCCTTCCTGCTCCTTCATTTCACGCTTCACTTCTTCCTTATCCATTTTCATATCTTTCATGGTCAGGAAATATTCCGCAATAGCATCCAA',
 'ATGAAGGCGTGCACTCGTTCATCGGATCGCATAGCTGATAGCTTTTATTTAACACGTCAACCATCG',
 'ATGACAGCAGACAGCCCAGCA',
 'ATGCATGCGGCCATACTCCCAATGCCACCAGGA',
 'ATGCCACCAGGA',
 'ATGTCCATATACTGTAATATCAATCCCTTAGACAGTAAGGTCCAGCCATCAAGCGCGACAAAAAGCACCAGCTTAATAGGTGTAGATATCGTCACCGGACTCATCATCATCATCCCCAGCGCCAGTAGCACGCTGGATACCACCAGGTCGACGACGACAAAGGGCAAA',
 'ATGGGCCACATAACAAACATAGAAAGCAGCAATGCGACGCCGTTAAGCGTCATATTTGAAGGTATCTGCTGTAATCCCAGGGCGTTACGCACCATGACAAATACAATAGAAAATTTAACGAAACAGGTTCCTGACGCAATAATAAATGGCAACAGGGTGGAAAATGCCAGTAAGGCAATTAATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCC',
 'ATGACAAATACAATAGAAAATTTAACGAAACAGGTTCCTGACGCAATAATAAATGGCAACAGGGTGGAAAATGCCAGTAAGGCAATTAATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCC',
 'ATGGATCTCAACGCC',
 'ATGTTGAATATC',
 'ATGACC',
 'ATGCTCAAACCACAGGCCGCCCCGATCGCTCATAATGTGCAACAATTTCCCTTCCGGCAGCGCGCTTCCCGGTACGGGGTTCTCTACGCA',
 'ATGCGGCACGGGCAACTCAAACGGTCGCTCTGTTGCAGCAAGCCAGGGAACGACCAGGTGCTCAGCGCCAGCAGAAACCGCCGCCCCAGCCAGAGCGGGAGAGACATGCTCAAGCCAGTCCCCAGGTTTAATCCAGGCCGACCACCGTTTTTCTGCATCGCTCAACCGAACCCACATTCCCTGTCGCGTCGGATATTCCAGCGTCGCTTCCCGGCCATGGCGCTGGCATTCTGTCGCGGTTTGCGCCAA',
 'ATGCTCAAGCCAGTCCCCAGGTTTAATCCAGGCCGACCACCGTTTTTCTGCATCGCTCAACCGAACCCACATTCCCTGTCGCGTCGGATATTCCAGCGTCGCTTCCCGGCCATGGCGCTGGCATTCTGTCGCGGTTTGCGCCAA',
 'ATGGCGCTGGCATTCTGTCGCGGTTTGCGCCAA',
 'ATGGCTTTATTATCCCCAGGCAATGCCTTGATATTATCGTCAGAGATTCCCGTGGCGTTATCGGTTACTTCACTAACGGATTCCACAGCTTTTAAATCAGCAGATAATTTTTTACCGCTTACGCTTTCTAACGGCCTATTTTTAGACGATAGCAACGCTGCGGATTTATCTACTTTGGCCTCCGCAGAGATCAAACCGACAGATTTTTCAGCAGTGACTTTCAACAGTTTTTCAGCAATCCTGAGTTCGCTTTTTCCGTTA',
 'ATGTTCCCGGATGAACTGACAGCTGACACATCGCCCATTAAATTATCTCCTCTGACTCGGCCTCTTCCTGCTGTATCTCTCGCTGGATATAGAATCTTTTCTGACGGATTATCCAGCGTTGATAGTTCCCTTCTTTGCGCAACCAATATTTACTTTTTTTCTGAAACTCTTCCCTTTTCTTTTCCAGCTCGCTCCGTTTTTCCTGAATTTGTA',
 'ATGAAAAGCTGGAGCTCTTCCAAACGCGTCATTAATTTACGCACGGCAGATGCCTGTTCAGCATGTGTCGGCGTCGTGACTTGTCCAAAAACGCGGCTTACGCTTTTCAGTACATCGATTGCCGGG',
 'ATGCTTCCCTCGCTGGTCGCCCCTGGGCGTTCCAGCAAGCGGGGCAAATTATCGAATACGGAGGCGGGA',
 'ATGGAATCGATAAAAAGCACGACCCGTTTTCCCTGGTCGCGAAAATATTCCGCTACGGTTGTCGCCAGTTGCGCCGCATTGCAGCGATCGACCGAGGGGAAATCGGAAGTGGCAAAAACCAGCACGCATTTTTCTTTCTTATGCGAAGCGCGCAACATATCCACGAATTCAGTGACCTCACGGCCTCGTTCACCGATAAGACCGATAACAAAGACATCCGCCTCCGTTTGCTCGATCAGCATATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAATAACCCGTCAATCGCGCGCACACCGGTAATCAGCGGTTCACGGACGCCAACGCGTGAAGCG',
 'ATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAATAACCCGTCAATCGCGCGCACACCGGTAATCAGCGGTTCACGGACGCCAACGCGTGAAGCG',
 'ATGCCCATTCGCTGGCCTACGCCACAGGTCAATAACCCGTCAATCGCGCGCACACCGGTAATCAGCGGTTCACGGACGCCAACGCGTGAAGCG',
 'ATGAACGTCTTT',
 'ATGCAGTTATTAAACCCCGACGGTGGT',
 'ATGAATACATCGCTACTGCCTTACGCGGCTCAATGCCGTACCTCGTTTTCTTGTGGC',
 'ATGGTCAGGAAATATTCCGCAATAGCATCCAATAATAAGACAATCAATGCGCAAGCAAGGCAAGTTAATACCAATGCGAGGAGAAGTTCACGCCAAATGACGGCAATACCTACAATATTGCCATTTAGC',
 'ATGACGGCAATACCTACAATATTGCCATTTAGC',
 'ATGATGGCGGCCACCACAAAGGATGAGAGATACAGTAGGGTTTTGACCGTATCTTTAACCGTGCGCATACTAAAAAGTTTTTTTGCCCCTTCTACCGGGTTTAACGCCGATAAATTAGGCTTTAATGCTTCTGTCGCCAGCACAAAACCGGCCTGTAATAACGCCGGTAATGCGGAACACACTAAGCAGAGCAGCATAAATGGAATCAGATATTTTAACCCTATCCCAAAAACGGCCAAACTG',
 'ATGGCGGCCACCACAAAGGATGAGAGATACAGTAGGGTTTTGACCGTATCTTTAACCGTGCGCATACTAAAAAGTTTTTTTGCCCCTTCTACCGGGTTTAACGCCGATAAATTAGGCTTTAATGCTTCTGTCGCCAGCACAAAACCGGCCTGTAATAACGCCGGTAATGCGGAACACACTAAGCAGAGCAGCATAAATGGAATCAGATATTTTAACCCTATCCCAAAAACGGCCAAACTG',
 'ATGCTC',
 'ATGATCTTAATTATCCCCATAAACTCATTAAATGAGCCATACGACACCAGA',
 'ATGAGATCTTTACTTTTAAATGACTGGCCTTTTTTAGCGGAGTCTTCCAGCCGTTTTTTAGTCGGTTTTTCTGTTTTATTCGAGGACATGCGTCGCCCCTCGCTCGTAAAACCAACTGCT',
 'ATGCGTCGCCCCTCGCTCGTAAAACCAACTGCT',
 'ATGCATAACCCAAAAAGGCCA',
 'ATGCGGCCATACTCCCAATGCCACCAGGATGATAATGGCGTTTCTCGGCGCACCGCT',
 'ATGAATTTCAAAGTACAACGCGTAAAACATTTTTTACCCCTTAGCCAACGCCAGGAATATCACCTGACGCCCGTAAGAGAG',
 'ATGTTGCAATGTCCATATACTGTAATATCAATCCCT',
 'ATGATGAAATATCAT',
 'ATGAAATATCAT',
 'ATGCGACGCCGT',
 'ATGGCAACAGGGTGGAAAATGCCAGTAAGGCAATTAATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCCTAAGGTGTCATTCATCTGTACCAGTTCGCCATTACCCAGCAAAACACCATTCGCCATAATTTCAACGTTAAGTTCAGCATTGGTCGGCAGTGATAATAGCTGTTGCTGCCCCATGGCTTCGAGTTCGGCGAGGGTAACGTTCTTACGATACAAAACAAATTCCAGTTTGACGGGCAAT',
 'ATGCCAGTAAGGCAATTAATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCCTAAGGTGTCATTCATCTGTACCAGTTCGCCATTACCCAGCAAAACACCATTCGCCATAATTTCAACGTTAAGTTCAGCATTGGTCGGCAGTGATAATAGCTGTTGCTGCCCCATGGCTTCGAGTTCGGCGAGGGTAACGTTCTTACGATACAAAACAAATTCCAGTTTGACGGGCAAT',
 'ATGAGATATCATTCCCCATTACCAGACTCGCTCAGCCATTCATGGATCTCAACGCCTAAGGTGTCATTCATCTGTACCAGTTCGCCATTACCCAGCAAAACACCATTCGCCATAATTTCAACGTTAAGTTCAGCATTGGTCGGCAGTGATAATAGCTGTTGCTGCCCCATGGCTTCGAGTTCGGCGAGGGTAACGTTCTTACGATACAAAACAAATTCCAGTTTGACGGGCAAT',
 'ATGGCTTCGAGTTCGGCGAGGGTAACGTTCTTACGATACAAAACAAATTCCAGTTTGACGGGCAAT',
 'ATGTGCAACAATTTCCCTTCCGGCAGCGCGCTTCCCGGTACGGGGTTCTCTACGCATAAACGCCGACAGGACAAATGCGGCACGGGCAACTCAAACGGTCGCTCTGTTGCAGCAAGCCAGGGAACGACCAGGTGCTCAGCGCCAGCAGAAACCGCCGCCCCAGCCAGAGCGGGAGAGACATGCTCAAGCCAGTCCCCAGGTTTAATCCAGGCCGACCACCGTTTTTCTGCATCGCTCAACCGAACCCACATTCCCTGTCGCGTCGGATATTCCAGCGTCGCTTCCCGGCCATGGCGCTGGCATTCTGTCGCGGTTTGCGCCAATAGCCATTCGCGACGATCAATCTGTCTCACACGCAATGACATCAGGCGTCATCCTCCTCGCCAGATTGCTGTCTGTGCTGTTGCTGCTGCGGATTTTGTTGATCGTCTCGCGTCAGGTGCCAGCGCTGGGGATTACCGTTTTGCCATTGATCATGCAAACGATGTTCAACCTGCGTATTTGACGGTATTAACGAAAACTCCCCTGCTTGCCGCGCCTGAATAT',
 'ATGCCT',
 'ATGCCGCCAGTAATAACGGTAAAGCCGTTTCTTTATGCATTACGAAAGCATCGCCATAGTCGCGATCTTTTTTATCACCGGAATATTCTGTCTTATGTTTTTCCACCGCTTTTTTTAATGCTTCTGATAAACCGCCAACCTCATCCTGCTGCGGCAGTAAAATGTTCCCGGATGAAC',
 'ATGCTTCTGATAAACCGCCAACCTCATCCTGCTGCGGCAGTAAAATGTTCCCGGATGAAC',
 'ATGAAC',
 'ATGAAATACCGTACAGCGCCGCTGCAATACTTTAATTCTGGTCAGCGAATGCATTCATACCGCTCAACGTGTCATCAAAGGATGAATACTGCGCTACCGGCTGGCATAACCAGGCTTTCAGGCTATCCCGCATCTGCATCGCCCGATCGTTATCGATATTTTCGCCAGGACGATATTCTCCCAAGTCAA',
 'ATGCATTCATACCGCTCAACGTGTCATCAAAGGATGAATACTGCGCTACCGGCTGGCATAACCAGGCTTTCAGGCTATCCCGCATCTGCATCGCCCGATCGTTATCGATATTTTCGCCAGGACGATATTCTCCCAAGTCAA',
 'ATGAATACTGCGCTACCGGCTGGCATAACCAGGCTTTCAGGCTATCCCGCATCTGCATCGCCCGATCGTTATCGATATTTTCGCCAGGACGATATTCTCCCAAGTCAA',
 'ATGCCTGTTCAGCATGTGTCGGCGTCG',
 'ATGCGCAAGCAAGGCAAGTTAATACCAATGCGAGGAGAAGTTCACGCCAAA',
 'ATGCGAGGAGAAGTTCACGCCAAA',
 'ATGAGAGATACAGTAGGGTTT',
 'ATGCTTCTGTCGCCAGCACAAAACCGGCCTGTAATAACGCCGGTAATGCGGAACACACTAAGCAGAGCAGCA',
 'ATGCGGAACACACTAAGCAGAGCAGCA',
 'ATGGAATCAGATATTTTAACCCTATCCCAAAAACGGCCAAACTGTAGTCAGCCATGCTCTGATCAAAATTATCCGCAA',
 'ATGAGCCATACGACACCAGATAGGCAATTCCTCCCAGCGTCAGGCAGGCGA',
 'ATGACTGGCCTTTTT',
 'ATGACGCCTACCGCCGCTTCTTGCAGAACTAACGGGATCATCGCCACCGATAAAAACGGCGGCGCCTCGTTCAATGCATGCGGCCATACTCCCAATGCCACCAGGATGATAATGGCGTTTCTCGGCGCACCGCTTAATACCCCGCTATTCAAAAACGGCAGGAAGAAAAAAATCGGCGCCACGCGAGCAAACCCTAGCGCCGCAGACGCAACCAGG',
 'ATGATAATGGCGTTTCTCGGCGCACCGCTTAATACCCCGCTATTCAAAAACGGCAGGAAGAAAAAAATCGGCGCCACGCGAGCAAACCCTAGCGCCGCAGACGCAACCAGG',
 'ATGGCGTTTCTCGGCGCACCGCTTAATACCCCGCTATTCAAAAACGGCAGGAAGAAAAAAATCGGCGCCACGCGAGCAAACCCTAGCGCCGCAGACGCAACCAGG',
 'ATGCCAAAAGGCAGCGTCTGTTCCTGTAATTGCGTTACCGTCTGGAATAACCCTACCAGGAGGCCGATAATCGTTGCGACAATCGTCGGCCACCCTGACAGGATCAAAACAAGA',
 'ATGATGTCATGTTGCAATGTCCATATACTG',
 'ATGTCATGTTGCAATGTCCATATACTG',
 'ATGCATTATGGGCCACATAACAAACATAGAAAGCAGCAATGCGACGCCGTTAAGCGTCATATT',
 'ATGACATCAGGCGTCATCCTCCTCGCCAGATTGCTGTCTGTGCTGTTGCTGCTGCGGATTTTGTTGATCGTCTCGCGTCAGGTGCCAGCGCTGGGGATTACCGTTTTGCCATTGATCATGCAAACGATGTTCAACCTGCGTATT',
 'ATGCAAACGATGTTCAACCTGCGTATT',
 'ATGTTCAACCTGCGTATT',
 'ATGATCTTTATCTTCAGGCTTACCGGTATTGGCTGCGGCCATTCGGGCAGGTGCGACATCCCGCGCCAGCGGCGCGCCCTCTTTACGAACGCCTTCGCCCGCGATGGCTTTATTATCCCCAGGCAATGCCTTGATATTATCGTCAGAGATTCCCGTGGCGTTATCGGTTACTTCACTAACGGATTCCACAGCTTT',
 'ATGATGCAGACCAGAAACGTTGCCATTGTGATGTTC',
 'ATGCAGACCAGAAACGTTGCCATTGTGATGTTC',
 'ATGTTC',
 'ATGTCGCCATGCCGCCAG',
 'ATGCATTACGAAAGCATCGCCATAGTCGCGATCTTTTTTATCACCGGAATATTCTGTCTTATGTTTTTCCACCGCTTTTTT',
 'ATGTTTTTCCACCGCTTTTTT',
 'ATGTGTCGGCGTCGTGACTTGTCCAAAAACGCGGCTTACGCTTTTCAGTACATCGATTGCCGGGTAATGTCCCTGCCCGGCCAGCTTTCTGCTCAGATACAGGTGACCGTCAAGGATAGAGCGAATTTCATCCGCCATCGGGTCCGCCTCTTCCTCGCTTTCCAGCAGTACCGTATAAAAGGCAGTAATGCTTCCCTCGCTGGTCGCCCCTGGGCGTTCCAGCAAGCGGGGCAAATTATCGAATACGGAGGCGGGATAACCTCGACGAGCCGGACGCTCTCCCGACGCCAGTGCCACGTCTCGCAAAGCACGCGCATAACGGGTCATGGAATCGATAAAAAGCACGACCCGTTTTCCCTGGTCGCGAAAATATTCCGCTACGGTTGTCGCCAGTTGCGCCGCATTGCAGCGATCGACCGAGGGGAAATCGGAAGTGGCAAAAACCAGCACGCATTTTTCTTTCTTATGCGAAGCGCGCAACATATCCACGAATTCAGTGACCTCACGGCCTCGTTCACCGATAAGACCGATAACAAAGACATCCGCCTCCGTTTGCTCGATCAGCATATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAA',
 'ATGTCCCTGCCCGGCCAGCTTTCTGCTCAGATACAGGTGACCGTCAAGGATAGAGCGAATTTCATCCGCCATCGGGTCCGCCTCTTCCTCGCTTTCCAGCAGTACCGTATAAAAGGCAGTAATGCTTCCCTCGCTGGTCGCCCCTGGGCGTTCCAGCAAGCGGGGCAAATTATCGAATACGGAGGCGGGATAACCTCGACGAGCCGGACGCTCTCCCGACGCCAGTGCCACGTCTCGCAAAGCACGCGCATAACGGGTCATGGAATCGATAAAAAGCACGACCCGTTTTCCCTGGTCGCGAAAATATTCCGCTACGGTTGTCGCCAGTTGCGCCGCATTGCAGCGATCGACCGAGGGGAAATCGGAAGTGGCAAAAACCAGCACGCATTTTTCTTTCTTATGCGAAGCGCGCAACATATCCACGAATTCAGTGACCTCACGGCCTCGTTCACCGATAAGACCGATAACAAAGACATCCGCCTCCGTTTGCTCGATCAGCATATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAA',
 'ATGCGAAGCGCGCAACATATCCACGAATTCAGTGACCTCACGGCCTCGTTCACCGATAAGACCGATAACAAAGACATCCGCCTCCGTTTGCTCGATCAGCATATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAA',
 'ATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAA']
def longest_ORF(dna):
    """ Finds the longest ORF on both strands of the specified DNA and returns it
        as a string
    >>> longest_ORF("ATGCGAATGTAGCATCAAA")
    'ATGCTACATTCGCAT'
    """
    #dna='ATGGCTAGCGATGCGAGCCCTACCGTGACCGATCCATGAGAGATGCTCTAGGCTATGAATGACGTAGCG'
    ourorf2=find_all_ORFs_both_strands(dna)
    thelength=[]
    for i in ourorf2:
        thelength.append(len(i))
    seqlen=dict((j,i) for j,i in zip(thelength,ourorf2))
    orderedlength=sorted(thelength)
    return seqlen[thelength[-1]]
#longest_ORF(dna)
 longest_ORF(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
'ATGCATCAGCATGGTCTTACCGCATCCTGCGGAGGCAAAAATGCCCATTCGCTGGCCTACGCCACAGGTCAA'
import random
def shuffle_string(s):
    """Shuffles the characters in the input string
        NOTE: this is a helper function, you do not
        have to modify th|s in any way """
    return ''.join(random.sample(s, len(s)))
num_trials=1000
def longest_ORF_noncoding(dna, num_trials):
    """ Computes the maximum length of the longest ORF over num_trials shuffles
        of the specfied DNA sequence

        dna: a DNA sequence
        num_trials: the number of random shuffles
        returns: the maximum length longest ORF 
    """
    num_trials=1000
    import random
    i=0
    frames3 =[]
    listofshuffled=[]
    while i <num_trials:
        listofshuffled.append(shuffle_string(dna))
        i+=1
    for i in listofshuffled:
        frames3.append(longest_ORF(i))
    return (max(frames3,key=len))
#longest_ORF_noncoding(dna, num_trials) 

longest_ORF_noncoding(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'), 1000)
'ATGGGGTCTTGCCATTTCCTCCTTCTCAGCGTCTGTTTACAAGATGAAGTACACGCCGGTCCGTATCTGTCGGCCGAGCTACTTGTTGGCAAATGCGTGGATTTTGCAGAAGGTTACCAAAGCTCTGTTAGTCCGTCGACTCCACTAATTGCCAATTGCTCAGCGCAACTCACAGTCAGCACTCAAGCTCACGCTGCAGCTGCTCATTCTCTTCCAGGTAGTTGGGAGAGATGCACCTTCACTCAGCCTCTCGCTGAGAAAGCTCCTCCGCAAAGGGCCCATCTGATTAGACATTACTGTCTGGTCCTCCTCTTTTCTGATCAA'
def coding_strand_to_AA(dna):
    """ Computes the Protein encoded by a sequence of DNA.  This function
        does not check for start and stop codons (it assumes that the input
        DNA sequence represents an protein coding region).

        dna: a DNA sequence represented as a string
        returns: a string containing the sequence of amino acids encoded by the
                 the input DNA fragment

        >>> coding_strand_to_AA("ATGCGA")
        'MR'
        >>> coding_strand_to_AA("ATGCCCGCTTT")
        'MPA'
    """
    aa = ['F', 'L', 'I', 'M', 'V', 'S', 'P', 'T', 'A', 'Y',
      '|', 'H', 'Q', 'N', 'K', 'D', 'E', 'C', 'W', 'R',
      'G']

    codons = [['TTT', 'TTC'],
              ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
              ['ATT', 'ATC', 'ATA'],
              ['ATG'],
              ['GTT', 'GTC', 'GTA', 'GTG'],
              ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
              ['CCT', 'CCC', 'CCA', 'CCG'],
              ['ACT', 'ACC', 'ACA', 'ACG'],
              ['GCT', 'GCC', 'GCA', 'GCG'],
              ['TAT', 'TAC'],
              ['TAA', 'TAG', 'TGA'],
              ['CAT', 'CAC'],
              ['CAA', 'CAG'],
              ['AAT', 'AAC'],
              ['AAA', 'AAG'],
              ['GAT', 'GAC'],
              ['GAA', 'GAG'],
              ['TGT', 'TGC'],
              ['TGG'],
              ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
              ['GGT', 'GGC', 'GGA', 'GGG']]

    # create a dictionary lookup table for mapping codons into amino acids
    aa_table = {}
    for i in range(len(aa)):
        for codon in codons[i]:
            aa_table[codon] = aa[i]
    init_pos = 0
    return''.join([aa_table[dna[pos:pos + 3]] for pos in range (init_pos, len(dna) -2, 3)])
    pass
#coding_strand_to_AA(dna)
coding_strand_to_AA(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
'GSDRENR|AFYP|SGAD|RRTRY|CRTAVLRFTRWRP|TADYRCARD|RVIDLWRRPANGHFCLRRMR|DHADAYADRANGGGCLCYRSYR|TRP|GH|IRGYVARFA|ERKMRAGFCHFRFPLGRSLQCGATGDNRSGIFSRPGKTGRAFYRFHDPLCACFARRGTGVGRASGSSRLSRLRIR|FAPLAGTPRGDQRGKHYCLLYGTAGKRGRGGPDGG|NSLYP|RSPVSEQKAGRAGTLPGNRCTEKRKPRFWTSHDADTC|TGICRA|INDAFGRAPAFH|LGRISSWRKYR|RSGDADAG|PESLVMPAGSAVFIL||HVERYECIR|PELKYCSGAVRYFIHSVSRYYFAIRMRTAGCRPRRRRSLNK|RV|NCY|IRCVQKTDSSVVRKFIRYYVSSLLFAGR|KI|NSRLYKFRKNGASWKRKGKSFRKKVNIGCAKKGTINAG|SVRKDSISSERYSRKRPSQRR|FNGRCVSCQFIREHFTAAAG|GWRFIRSIKKSGGKT|DRIFR||KRSRLWRCFRNA|RNGFTVITGGMATWRASEIRTSQWQRFWSAS|RKKRTQDC|KTVESHC|KICRFDLCGGQSR|IRSVAIV|K|AVRKRKR|KIIC|FKSCGIR||SNR|RHGNL|R|YQGIAWG||SHRGRRRS|RGRAAGAGCRTCPNGRSQYR|A|R|RS|KG|RCFSASAATNHYRRS|PINRRR|KNAFSGAIKADDDYFSHCRWRERRG|LADLPFSALGK|LFRQYSGAASRGVFVNTVKYAG|TSFA|SMAKR|SPALAPDARRSTKSAAATAQTAIWRGG|RLMSLRVRQIDRREWLLAQTATECQRHGREATLEYPTRQGMWVRLSDAEKRWSAWIKPGDWLEHVSPALAGAAVSAGAEHLVVPWLAATERPFELPVPHLSCRRLCVENPVPGSALPEGKLLHIMSDRGGLWFEHLPELPAVGGGRPKMLRWPLRFVIGSSDTQRSLLGRIGIGDVLLIRTSRAEVYCYAKKLGHFNRVEGGIIVETLDIQHIEEENNTTETAETLPGLNQLPVKLEFVLYRKNVTLAELEAMGQQQLLSLPTNAELNVEIMANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE|YLINCLTGIFHPVAIYYCVRNLFR|IFYCICHGA|RPGITADTFKYDA|RRRIAAFYVCYVAHNA|CLRLF|GRRCHL||YFIIK|TR||RSGWLSRLSDQIFRSRVSSVF|KRATEASVWRRDRDGKA|QR|N|KTFNICVITCLCAERNKKRV|NWFLSLFALCRRRPGGIQRATGAGDDDDESGDDIYTY|AGAFCRA|WLDLTV|GIDITVYGHCNMTSLRDGIVKWMI|CLQVIRRSILF|SCQGGRRLSQRLSASW|GYSRR|RNYRNRRCLLALNYLACVYACFYCLAGMAKFYSLTGVR|YSWRWLRGKKCFTRCTLKFITWLRLRR|GLLAWRRFFSSCRF|IAGY|AVRRETPLSSWWHWEYGRMH|TRRRRFYRWR|SR|FCKKRR|ASCWAVCCHGLFGLCMRWVVLSITSEGQR|VVVSIRQTVLIPRKWLIS|ICLPLSFIYKTAVWSRWLTC|IKAISYAIR|TSARLHYRRY|RLLIRWLKTPWFWPVRWY|CCCCQKYSWVYCRALLRK|TLLRFH|R|KAVLPF|LCCFISLRYYRTMYCDSLSRPQG|AVGFTSEGRRMSSNKTEKPTKKRLEDSAKKGQSFKSKDLIIACLTLGGIAYLVSYGSFNEFMGIIKIIIADNFDQSMADYSLAVFGIGLKYLIPFMLLCLVCSALPALLQAGFVLATEALKPNLSALNPVEGAKKLFSMRTVKDTVKTLLYLSSFVVAAIICWKKYKVEIFSQLNGNIVGIAVIWRELLLALVLTCLACALIVLLLDAIAEYFLTMKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH|AA|GSSDVFIGRFLNVH|PPSGFNNCIR|TQSLSSTKSVTDNRSK|WIIKIMSAKNVLRK|FGMPLVKAPR|KTFMGS'
def gene_finder(dna):
    """ Returns the amino acid sequences that are likely coded by the specified dna

        dna: a DNA sequence
        returns: a list of all amino acid sequences coded by the sequence dna.
    """
    my_genes=[]
    for i in find_all_ORFs_both_strands(dna):
        my_genes.append(coding_strand_to_AA(i))
    return my_genes
#gene_finder(dna)
gene_finder(load_seq('/home/eanbit2/mscbioinfo/introtoprog/python/mini-project-eunicenjuguna/data/X73525.fa'))
['MR',
 'MRAGFCHFRFPLGRSLQCGATGDNRSGIFSRPGKTGRAFYRFHDPLCACFARRGTGVGRASGSSRLSRLRIR',
 'MPAGSAVFIL',
 'MRTAGCRPRRRRSLNK',
 'MATWRASEIRTSQWQRFWSAS',
 'MAKR',
 'MSLRVRQIDRREWLLAQTATECQRHGREATLEYPTRQGMWVRLSDAEKRWSAWIKPGDWLEHVSPALAGAAVSAGAEHLVVPWLAATERPFELPVPHLSCRRLCVENPVPGSALPEGKLLHIMSDRGGLWFEHLPELPAVGGGRPKMLRWPLRFVIGSSDTQRSLLGRIGIGDVLLIRTSRAEVYCYAKKLGHFNRVEGGIIVETLDIQHIEEENNTTETAETLPGLNQLPVKLEFVLYRKNVTLAELEAMGQQQLLSLPTNAELNVEIMANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE',
 'MWVRLSDAEKRWSAWIKPGDWLEHVSPALAGAAVSAGAEHLVVPWLAATERPFELPVPHLSCRRLCVENPVPGSALPEGKLLHIMSDRGGLWFEHLPELPAVGGGRPKMLRWPLRFVIGSSDTQRSLLGRIGIGDVLLIRTSRAEVYCYAKKLGHFNRVEGGIIVETLDIQHIEEENNTTETAETLPGLNQLPVKLEFVLYRKNVTLAELEAMGQQQLLSLPTNAELNVEIMANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE',
 'MSDRGGLWFEHLPELPAVGGGRPKMLRWPLRFVIGSSDTQRSLLGRIGIGDVLLIRTSRAEVYCYAKKLGHFNRVEGGIIVETLDIQHIEEENNTTETAETLPGLNQLPVKLEFVLYRKNVTLAELEAMGQQQLLSLPTNAELNVEIMANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE',
 'MLRWPLRFVIGSSDTQRSLLGRIGIGDVLLIRTSRAEVYCYAKKLGHFNRVEGGIIVETLDIQHIEEENNTTETAETLPGLNQLPVKLEFVLYRKNVTLAELEAMGQQQLLSLPTNAELNVEIMANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE',
 'MGQQQLLSLPTNAELNVEIMANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE',
 'MANGVLLGNGELVQMNDTLGVEIHEWLSESGNGE',
 'MNDTLGVEIHEWLSESGNGE',
 'MTSLRDGIVKWMI',
 'MI',
 'MAKFYSLTGVR',
 'MH',
 'MRWVVLSITSEGQR',
 'MYCDSLSRPQG',
 'MSSNKTEKPTKKRLEDSAKKGQSFKSKDLIIACLTLGGIAYLVSYGSFNEFMGIIKIIIADNFDQSMADYSLAVFGIGLKYLIPFMLLCLVCSALPALLQAGFVLATEALKPNLSALNPVEGAKKLFSMRTVKDTVKTLLYLSSFVVAAIICWKKYKVEIFSQLNGNIVGIAVIWRELLLALVLTCLACALIVLLLDAIAEYFLTMKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MGIIKIIIADNFDQSMADYSLAVFGIGLKYLIPFMLLCLVCSALPALLQAGFVLATEALKPNLSALNPVEGAKKLFSMRTVKDTVKTLLYLSSFVVAAIICWKKYKVEIFSQLNGNIVGIAVIWRELLLALVLTCLACALIVLLLDAIAEYFLTMKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MADYSLAVFGIGLKYLIPFMLLCLVCSALPALLQAGFVLATEALKPNLSALNPVEGAKKLFSMRTVKDTVKTLLYLSSFVVAAIICWKKYKVEIFSQLNGNIVGIAVIWRELLLALVLTCLACALIVLLLDAIAEYFLTMKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MLLCLVCSALPALLQAGFVLATEALKPNLSALNPVEGAKKLFSMRTVKDTVKTLLYLSSFVVAAIICWKKYKVEIFSQLNGNIVGIAVIWRELLLALVLTCLACALIVLLLDAIAEYFLTMKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MRTVKDTVKTLLYLSSFVVAAIICWKKYKVEIFSQLNGNIVGIAVIWRELLLALVLTCLACALIVLLLDAIAEYFLTMKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MKDMKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MKMDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MDKEEVKREMKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MKEQEGNPEVKSKRREVHMEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MEILSEQVKSDIENSRLIVANPTHITIGIYFKPELMPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MPIPMISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MISVYETNQRALAVRAYAEKVGVPVIVDIKLARSLFKTHRRYDLVSLEEIDEVLRLLVWLEEVENAGKDVIQPQENEVRH',
 'MSAKNVLRK',
 'MPLVKAPR',
 'MGS',
 'MGIFASAGCGKTMLMHMLIEQTEADVFVIGLIGERGREVTEFVDMLRASHKKEKCVLVFATSDFPSVDRCNAAQLATTVAEYFRDQGKRVVLFIDSMTRYARALRDVALASGERPARRGYPASVFDNLPRLLERPGATSEGSITAFYTVLLESEEEADPMADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MLMHMLIEQTEADVFVIGLIGERGREVTEFVDMLRASHKKEKCVLVFATSDFPSVDRCNAAQLATTVAEYFRDQGKRVVLFIDSMTRYARALRDVALASGERPARRGYPASVFDNLPRLLERPGATSEGSITAFYTVLLESEEEADPMADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MHMLIEQTEADVFVIGLIGERGREVTEFVDMLRASHKKEKCVLVFATSDFPSVDRCNAAQLATTVAEYFRDQGKRVVLFIDSMTRYARALRDVALASGERPARRGYPASVFDNLPRLLERPGATSEGSITAFYTVLLESEEEADPMADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MLIEQTEADVFVIGLIGERGREVTEFVDMLRASHKKEKCVLVFATSDFPSVDRCNAAQLATTVAEYFRDQGKRVVLFIDSMTRYARALRDVALASGERPARRGYPASVFDNLPRLLERPGATSEGSITAFYTVLLESEEEADPMADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MLRASHKKEKCVLVFATSDFPSVDRCNAAQLATTVAEYFRDQGKRVVLFIDSMTRYARALRDVALASGERPARRGYPASVFDNLPRLLERPGATSEGSITAFYTVLLESEEEADPMADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MTRYARALRDVALASGERPARRGYPASVFDNLPRLLERPGATSEGSITAFYTVLLESEEEADPMADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MADEIRSILDGHLYLSRKLAGQGHYPAIDVLKSVSRVFGQVTTPTHAEQASAVRKLMTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MTRLEELQLFIDLGEYRPGENIDNDRAMQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MQMRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MRDSLKAWLCQPVAQYSSFDDTLSGMNAFADQN',
 'MNAFADQN',
 'MGDVSAVSSSGNILLPQQDEVGGLSEALKKAVEKHKTEYSGDKKDRDYGDAFVMHKETALPLLLAAWRHGAPAKSEHHNGNVSGLHHNGKSELRIAEKLLKVTAEKSVGLISAEAKVDKSAALLSSKNRPLESVSGKKLSADLKAVESVSEVTDNATGISDDNIKALPGDNKAIAGEGVRKEGAPLARDVAPARMAAANTGKPEDKDHKKVKDVSQLPLQPTTIADLSQLTGGDEKMPLAAQSKPMMTIFPTADGVKGEDSSLTYRFQRWGNDYSVNIQARQAGEFSLIPSNTQVEHRLHDQWQNGNPQRWHLTRDDQQNPQQQQHRQQSGEEDDA',
 'MHKETALPLLLAAWRHGAPAKSEHHNGNVSGLHHNGKSELRIAEKLLKVTAEKSVGLISAEAKVDKSAALLSSKNRPLESVSGKKLSADLKAVESVSEVTDNATGISDDNIKALPGDNKAIAGEGVRKEGAPLARDVAPARMAAANTGKPEDKDHKKVKDVSQLPLQPTTIADLSQLTGGDEKMPLAAQSKPMMTIFPTADGVKGEDSSLTYRFQRWGNDYSVNIQARQAGEFSLIPSNTQVEHRLHDQWQNGNPQRWHLTRDDQQNPQQQQHRQQSGEEDDA',
 'MAAANTGKPEDKDHKKVKDVSQLPLQPTTIADLSQLTGGDEKMPLAAQSKPMMTIFPTADGVKGEDSSLTYRFQRWGNDYSVNIQARQAGEFSLIPSNTQVEHRLHDQWQNGNPQRWHLTRDDQQNPQQQQHRQQSGEEDDA',
 'MPLAAQSKPMMTIFPTADGVKGEDSSLTYRFQRWGNDYSVNIQARQAGEFSLIPSNTQVEHRLHDQWQNGNPQRWHLTRDDQQNPQQQQHRQQSGEEDDA',
 'MMTIFPTADGVKGEDSSLTYRFQRWGNDYSVNIQARQAGEFSLIPSNTQVEHRLHDQWQNGNPQRWHLTRDDQQNPQQQQHRQQSGEEDDA',
 'MTIFPTADGVKGEDSSLTYRFQRWGNDYSVNIQARQAGEFSLIPSNTQVEHRLHDQWQNGNPQRWHLTRDDQQNPQQQQHRQQSGEEDDA',
 'MAGKRRWNIRRDRECGFG',
 'MQKNGGRPGLNLGTGLSMSLPLWLGRRFLLALSTWSFPGLLQQSDRLSCPCRICPVGVYA',
 'MSLPLWLGRRFLLALSTWSFPGLLQQSDRLSCPCRICPVGVYA',
 'MYS',
 'MLNLTLKLWRMVFCWVMANWYR',
 'MVFCWVMANWYR',
 'MANWYR',
 'MTP',
 'MNG',
 'MGNDISLIALLAFSTLLPFIIASGTCFVKFSIVFVMVRNALGLQQIPSNMTLNGVALLLSMFVMWPIMHDAYVYFEDEDVTFNDISSLSKHVDEGLDGYRDYLIKYSDRELVQFFENAQLKRQYGEETETVKRDKDEIEKPSIFALLPAYALSEIKSAFKIGFYLYLPFVVVDLVVSSVLLALGMMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MVRNALGLQQIPSNMTLNGVALLLSMFVMWPIMHDAYVYFEDEDVTFNDISSLSKHVDEGLDGYRDYLIKYSDRELVQFFENAQLKRQYGEETETVKRDKDEIEKPSIFALLPAYALSEIKSAFKIGFYLYLPFVVVDLVVSSVLLALGMMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MTLNGVALLLSMFVMWPIMHDAYVYFEDEDVTFNDISSLSKHVDEGLDGYRDYLIKYSDRELVQFFENAQLKRQYGEETETVKRDKDEIEKPSIFALLPAYALSEIKSAFKIGFYLYLPFVVVDLVVSSVLLALGMMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MFVMWPIMHDAYVYFEDEDVTFNDISSLSKHVDEGLDGYRDYLIKYSDRELVQFFENAQLKRQYGEETETVKRDKDEIEKPSIFALLPAYALSEIKSAFKIGFYLYLPFVVVDLVVSSVLLALGMMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MWPIMHDAYVYFEDEDVTFNDISSLSKHVDEGLDGYRDYLIKYSDRELVQFFENAQLKRQYGEETETVKRDKDEIEKPSIFALLPAYALSEIKSAFKIGFYLYLPFVVVDLVVSSVLLALGMMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MHDAYVYFEDEDVTFNDISSLSKHVDEGLDGYRDYLIKYSDRELVQFFENAQLKRQYGEETETVKRDKDEIEKPSIFALLPAYALSEIKSAFKIGFYLYLPFVVVDLVVSSVLLALGMMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MMMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MMMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MMSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MSPVTISTPIKLVLFVALDGWTLLSKGLILQYMDIAT',
 'MDIAT',
 'MLVFTVWLVWRSFTLLRASGDIPGVG',
 'MAACIERGAAVFIGGDDPVSSARSGGRRHAGLSAVMAFLGYACAGLYYR',
 'MAFLGYACAGLYYR',
 'MRSDERVHAFITAAINVY',
 'MAHLMSLWG',
 'MSLWG',
 'MAIL',
 'MLLRNIS',
 'MKRISAHWPSAPMRRRLAYL',
 'MRRRLAYL',
 'MIW',
 'MKFYVFWFGWKR',
 'MYSLGVF',
 'MFTNHRRGLITASDKRSR',
 'MSHRRLTLHALASVNR',
 'MSLLSVLSVNEAVRSLNSWICCALRIRKKNACWFLPLPISPRSIAAMRRNWRQP',
 'MRRNWRQP',
 'MRVLCETWHWRRESVRLVEVIPPPYSIICPACWNAQGRPAREALLPFIRYCWKARKRRTRWRMKFALSLTVTCI',
 'MKFALSLTVTCI',
 'MY',
 'MLNRHLPCVN',
 'MTR',
 'MHSLTRIKVLQRRCTVFHSQCESILLRYQDEDRGLQAEEEAILEQIAGLKLLLDTLRAENRQLSREEIYTLLRKQSIVRRQIKDLELQIIQIQEKRSELEKKREEFQKKSKYWLRKEGNYQRWIIRQKRFYIQREIQQEEAESEEII',
 'MCQLSVHPGTFYCRSRMRLAVYQKH',
 'MRLAVYQKH',
 'MAMLS',
 'MLS',
 'MARQRNQNITMATFLVCIITEKANSGLLKNC',
 'MATFLVCIITEKANSGLLKNC',
 'MSHLPEWPQPIPVSLKIKIIKRLKMFLSFRCNQPLSPILAN',
 'MFLSFRCNQPLSPILAN',
 'MKKCL',
 'MA',
 'MTIPSIFRRGKQGSFR',
 'MINGKTVIPSAGT',
 'MTPDVIACETD',
 'MAIGANRDRMPAPWPGSDAGISDATGNVGSVERCRKTVVGLD',
 'MPAPWPGSDAGISDATGNVGSVERCRKTVVGLD',
 'MRREPRTGKRAAGREIVAHYERSGRPVV',
 'MAERVW',
 'MISH',
 'MMPTSILRTKMSPLMIFHH',
 'MPTSILRTKMSPLMIFHH',
 'MSPLMIFHH',
 'MIFHH',
 'MKVWMVIAII',
 'MVIAII',
 'MEKRPRR',
 'MKLKNLQYLRYYLLMR',
 'MR',
 'MAGPYCLRD',
 'MDDLVFAGNKALYLVLILSGWPTIVATIIGLLVGLFQTVTQLQEQTLPFGIKLLGVCLCLFLLSGWYGEVLLSYGRQVIFLALAKG',
 'MFYALYFEIHHLVASAALGFARVAPIFFFLPFLNSGVLSGAPRNAIIILVALGVWPHALNEAPPFLSVAMIPLVLQEAAVGVMLGCLLSWPFWVMHALGCIIDNQRGATLSSSIDPANGIDTSEMANFLNMFAAVVYLQNGGLVTMVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MIPLVLQEAAVGVMLGCLLSWPFWVMHALGCIIDNQRGATLSSSIDPANGIDTSEMANFLNMFAAVVYLQNGGLVTMVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MLGCLLSWPFWVMHALGCIIDNQRGATLSSSIDPANGIDTSEMANFLNMFAAVVYLQNGGLVTMVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MHALGCIIDNQRGATLSSSIDPANGIDTSEMANFLNMFAAVVYLQNGGLVTMVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MANFLNMFAAVVYLQNGGLVTMVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MFAAVVYLQNGGLVTMVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MVDVLNKSYQLCDPMNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MNECTPSLPPLLTFINQVAQNALVLASPVVLVLLLSEVFLGLLSRFAPQMNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MNAFAISLTVKSGIAVLIMLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MLLYFSPVLPDNVLRLSFQATGLSSWFYERGATHVLE',
 'MDYQNNVSEERVAEMIWDAVSEGATLKDVHGI',
 'MIWDAVSEGATLKDVHGI',
 'MPYLVFLWLNNVFARVFYLFQPNQKT',
 'MGFEKTARQFDIDDHRYANLLRIGADGQCALIRFIHRDHRNRHQFGFKINPDRNMRGVGNNQA',
 'MRGVGNNQA',
 'MNFTSFRFNLWVPFLLLHFTLHFFLIHFHIFHGQEIFRNSIQ',
 'MKACTRSSDRIADSFYLTRQPS',
 'MTADSPA',
 'MHAAILPMPPG',
 'MPPG',
 'MSIYCNINPLDSKVQPSSATKSTSLIGVDIVTGLIIIIPSASSTLDTTRSTTTKGK',
 'MGHITNIESSNATPLSVIFEGICCNPRALRTMTNTIENLTKQVPDAIINGNRVENASKAINEISFPITRLAQPFMDLNA',
 'MTNTIENLTKQVPDAIINGNRVENASKAINEISFPITRLAQPFMDLNA',
 'MDLNA',
 'MLNI',
 'MT',
 'MLKPQAAPIAHNVQQFPFRQRASRYGVLYA',
 'MRHGQLKRSLCCSKPGNDQVLSASRNRRPSQSGRDMLKPVPRFNPGRPPFFCIAQPNPHSLSRRIFQRRFPAMALAFCRGLRQ',
 'MLKPVPRFNPGRPPFFCIAQPNPHSLSRRIFQRRFPAMALAFCRGLRQ',
 'MALAFCRGLRQ',
 'MALLSPGNALILSSEIPVALSVTSLTDSTAFKSADNFLPLTLSNGLFLDDSNAADLSTLASAEIKPTDFSAVTFNSFSAILSSLFPL',
 'MFPDELTADTSPIKLSPLTRPLPAVSLAGYRIFSDGLSSVDSSLLCATNIYFFSETLPFSFPARSVFPEFV',
 'MKSWSSSKRVINLRTADACSACVGVVTCPKTRLTLFSTSIAG',
 'MLPSLVAPGRSSKRGKLSNTEAG',
 'MESIKSTTRFPWSRKYSATVVASCAALQRSTEGKSEVAKTSTHFSFLCEARNISTNSVTSRPRSPIRPITKTSASVCSISICISMVLPHPAEAKMPIRWPTPQVNNPSIARTPVISGSRTPTREA',
 'MVLPHPAEAKMPIRWPTPQVNNPSIARTPVISGSRTPTREA',
 'MPIRWPTPQVNNPSIARTPVISGSRTPTREA',
 'MNVF',
 'MQLLNPDGG',
 'MNTSLLPYAAQCRTSFSCG',
 'MVRKYSAIASNNKTINAQARQVNTNARRSSRQMTAIPTILPFS',
 'MTAIPTILPFS',
 'MMAATTKDERYSRVLTVSLTVRILKSFFAPSTGFNADKLGFNASVASTKPACNNAGNAEHTKQSSINGIRYFNPIPKTAKL',
 'MAATTKDERYSRVLTVSLTVRILKSFFAPSTGFNADKLGFNASVASTKPACNNAGNAEHTKQSSINGIRYFNPIPKTAKL',
 'ML',
 'MILIIPINSLNEPYDTR',
 'MRSLLLNDWPFLAESSSRFLVGFSVLFEDMRRPSLVKPTA',
 'MRRPSLVKPTA',
 'MHNPKRP',
 'MRPYSQCHQDDNGVSRRTA',
 'MNFKVQRVKHFLPLSQRQEYHLTPVRE',
 'MLQCPYTVISIP',
 'MMKYH',
 'MKYH',
 'MRRR',
 'MATGWKMPVRQLMRYHSPLPDSLSHSWISTPKVSFICTSSPLPSKTPFAIISTLSSALVGSDNSCCCPMASSSARVTFLRYKTNSSLTGN',
 'MPVRQLMRYHSPLPDSLSHSWISTPKVSFICTSSPLPSKTPFAIISTLSSALVGSDNSCCCPMASSSARVTFLRYKTNSSLTGN',
 'MRYHSPLPDSLSHSWISTPKVSFICTSSPLPSKTPFAIISTLSSALVGSDNSCCCPMASSSARVTFLRYKTNSSLTGN',
 'MASSSARVTFLRYKTNSSLTGN',
 'MCNNFPSGSALPGTGFSTHKRRQDKCGTGNSNGRSVAASQGTTRCSAPAETAAPARAGETCSSQSPGLIQADHRFSASLNRTHIPCRVGYSSVASRPWRWHSVAVCANSHSRRSICLTRNDIRRHPPRQIAVCAVAAADFVDRLASGASAGDYRFAIDHANDVQPAYLTVLTKTPLLAAPEY',
 'MP',
 'MPPVITVKPFLYALRKHRHSRDLFYHRNILSYVFPPLFLMLLINRQPHPAAAVKCSRMN',
 'MLLINRQPHPAAAVKCSRMN',
 'MN',
 'MKYRTAPLQYFNSGQRMHSYRSTCHQRMNTALPAGITRLSGYPASASPDRYRYFRQDDILPSQ',
 'MHSYRSTCHQRMNTALPAGITRLSGYPASASPDRYRYFRQDDILPSQ',
 'MNTALPAGITRLSGYPASASPDRYRYFRQDDILPSQ',
 'MPVQHVSAS',
 'MRKQGKLIPMRGEVHAK',
 'MRGEVHAK',
 'MRDTVGF',
 'MLLSPAQNRPVITPVMRNTLSRAA',
 'MRNTLSRAA',
 'MESDILTLSQKRPNCSQPCSDQNYPQ',
 'MSHTTPDRQFLPASGRR',
 'MTGLF',
 'MTPTAASCRTNGIIATDKNGGASFNACGHTPNATRMIMAFLGAPLNTPLFKNGRKKKIGATRANPSAADATR',
 'MIMAFLGAPLNTPLFKNGRKKKIGATRANPSAADATR',
 'MAFLGAPLNTPLFKNGRKKKIGATRANPSAADATR',
 'MPKGSVCSCNCVTVWNNPTRRPIIVATIVGHPDRIKTR',
 'MMSCCNVHIL',
 'MSCCNVHIL',
 'MHYGPHNKHRKQQCDAVKRHI',
 'MTSGVILLARLLSVLLLLRILLIVSRQVPALGITVLPLIMQTMFNLRI',
 'MQTMFNLRI',
 'MFNLRI',
 'MIFIFRLTGIGCGHSGRCDIPRQRRALFTNAFARDGFIIPRQCLDIIVRDSRGVIGYFTNGFHSF',
 'MMQTRNVAIVMF',
 'MQTRNVAIVMF',
 'MF',
 'MSPCRQ',
 'MHYESIAIVAIFFITGIFCLMFFHRFF',
 'MFFHRFF',
 'MCRRRDLSKNAAYAFQYIDCRVMSLPGQLSAQIQVTVKDRANFIRHRVRLFLAFQQYRIKGSNASLAGRPWAFQQAGQIIEYGGGITSTSRTLSRRQCHVSQSTRITGHGIDKKHDPFSLVAKIFRYGCRQLRRIAAIDRGEIGSGKNQHAFFFLMRSAQHIHEFSDLTASFTDKTDNKDIRLRLLDQHMHQHGLTASCGGKNAHSLAYATGQ',
 'MSLPGQLSAQIQVTVKDRANFIRHRVRLFLAFQQYRIKGSNASLAGRPWAFQQAGQIIEYGGGITSTSRTLSRRQCHVSQSTRITGHGIDKKHDPFSLVAKIFRYGCRQLRRIAAIDRGEIGSGKNQHAFFFLMRSAQHIHEFSDLTASFTDKTDNKDIRLRLLDQHMHQHGLTASCGGKNAHSLAYATGQ',
 'MRSAQHIHEFSDLTASFTDKTDNKDIRLRLLDQHMHQHGLTASCGGKNAHSLAYATGQ',
 'MHQHGLTASCGGKNAHSLAYATGQ']
if __name__ == "__main__":
    import doctest
    doctest.testmod()
from load import load_nitrogenase_seq
>>> nitrogenase = load_nitrogenase_seq()
>>> print(nitrogenase)
ATGGGAAAACTCCGGCAGATCGCTTTCTACGGCAAGGGCGGGATCGGCAAGTCGACGACC
TCGCAGAACACCCTCGCGGCACTGGTCGAGATGGGTCAGAAGATCCTCATCGTCGGCTGC
GATCCCAAGGCCGACTCGACCCGCCTGATCCTGAACACCAAGCTGCAGGACACCGTGCTT
CACCTCGCCGCCGAAGCGGGCTCCGTCGAGGATCTCGAACTCGAGGATGTGGTCAAGATC
GGCTACAAGGGCATCAAATGCACCGAAGCCGGCGGGCCGGAGCCGGGCGTGGGCTGCGCG
GGCCGCGGCGTCATCACCGCCATCAACTTCCTGGAAGAGAACGGCGCCTATGACGACGTC
GACTACGTCTCCTACGACGTGCTGGGCGACGTGGTCTGCGGCGGCTTCGCCATGCCGATC
CGCGAGAACAAGGCGCAGGAAATCTACATCGTCATGTCGGGCGAGATGATGGCGCTCTAT
GCGGCCAACAACATCGCCAAGGGCATCCTGAAATACGCGAACTCGGGCGGCGTGCGCCTC
GGCGGCCTGATCTGCAACGAGCGCAAGACCGACCGCGAGCTGGAACTGGCCGAGGCCCTC
GCCGCGCGTCTGGGCTGCAAGATGATCCACTTCGTTCCGCGCGACAATATCGTGCAGCAC
GCCGAGCTCCGCCGCGAGACGGTCATCCAGTATGCGCCCGAGAGCAAGCAGGCGCAGGAA
TATCGCGAACTGGCCCGCAAGATCCACGAGAACTCGGGCAAGGGCGTGATCCCGACCCCG
ATCACCATGGAAGAGCTGGAAGAGATGCTGATGGATTTCGGCATCATGCAGTCCGAGGAA
GACCGGCTCGCCGCCATCGCCGCCGCCGAGGCCTGA