﻿Imports System.IO

Public Class cls_Compress
    Public CompressedFilename As String = Application.StartupPath & "/compressed_TAP.DAT"   'the compressed file is saved to disk
    Public CompressedFilesize As Long = 0
    Public CompressionFactor As String = ""
    Public Escapecode As Byte       'this is the escape code used in the file to indicate that the next 2 bytes are pointers to old data instead of normal data

    'Compression is mainly of use on large files, .TAP files are perfectly suited for compression 
    'due to the limitted number of used values and the repetetiveness of the data
    Public Function Compress_TAP(ByVal filepath As String) As Boolean
        '*** start of searching for unused codes**************************************
        'this piece of code is used to determine the escape code value, this is possible
        'because in a .TAP file not all values between 0 and 255 are used. nd we only need
        'one value to act like an escape code, so here we search for one.

        Const skip As UInteger = 20 'the first ... bytes of a TAP file (header) do not contain data, they must be skipped
        Const arr_size As UInteger = 256 'allways use the same buffer size on compress and decode!!  '(256 is also the max range for an 8-bit pointer smaller is possible but effective compression factor would be be lower) this is the size of the array (memory available for decompression)
        Const SyncPoint As Long = &H4000 'when the filesize has grown with another ... bytes then a synchronisation point must be added. This make it possible to rewinding and wind much faster then reading from the entire file.

        ''shift register, where every item is shifted one location to the left when a new item is added
        '' 0  3
        '0'0000
        '1'000a
        '2'00ab
        '3'0abc
        '4'abcd
        '5'bcde
        '6'cdef

        ''ringbuffer (- indicates the position of the ringbufferpointer P, which is the position-0 of the conventional shift register)
        ' '0  3
        '0'0000
        ' '-
        '1'a000
        ' ' -
        '2'ab00
        ' '  -
        '3'abc0
        ' '   -
        '4'abcd
        ' '-
        '5'ebcd
        ' ' -
        '6'efcd
        ' '  -

        ''ringbuffer (- indicates the position of the ringbufferpointer P, which is the position-0 of the conventional shift register
        ''as you can see this does not work when the size of the buffers differ!!
        '0'00000000
        ' '-
        '1'a0000000
        ' ' -
        '2'ab000000
        ' '  -
        '3'abc00000
        ' '   -
        '4'abcd0000
        ' '    -
        '5'abcde000
        ' '     -
        '6'abcdef00
        ' '      -
        '7'abcdefg0
        ' '       -
        '8'abcdefgh
        ' '-
        '9'ibcdefgh
        ' ' -

        Dim filesize As Long
        Dim i As Long
        Dim j As Integer
        Dim cnt As Integer
        Dim escape_not_found As Boolean

        Dim old_data(arr_size) As Byte
        Dim new_data(arr_size) As Byte
        Dim match_length As Integer
        Dim match_pntr As Integer
        Dim best_match_length As Integer
        Dim best_match_pntr As Integer
        Dim syncpointcounter As Integer 'variable used to count the number of created syncpoints so that we can identify the last syncpoint much easier

        Dim value() As Byte

        syncpointcounter = 0
        Compress_TAP = False    'exit with false, unless we are succesful in the routines below
        CompressionFactor = "could not be compressed"   'this is the deafult value, will be overwritten by the actual factor if succesfull
        Using fs As New MemoryStream(File.ReadAllBytes(filepath)) 'copy file to memory for faster access during processing
            filesize = fs.Length

            escape_not_found = True
            For cnt_lp = 255 To 0 Step -1  'we search from high to low, simple because i like to have a high value for an escape code, but it really doesn't matter what the value is as long as it isn't used for other things than escaping
                Application.DoEvents()      'update the GUI!!!

                fs.Position = skip 'skip ... bytes
                cnt = 0
                For i = skip To fs.Length
                    If (fs.ReadByte = CByte(cnt_lp)) Then
                        cnt = cnt + 1
                        Exit For 'no use in searching further because we already know that this value is already i use and therefore not suitable as an escapecode
                    End If
                Next i

                If (cnt = 0) Then
                    Debug.Print("value 0x" & cnt_lp.ToString("X2") & " is not found in file and therefore suited for use as an escape code.")
                    Escapecode = CByte(cnt_lp)
                    escape_not_found = False
                    Exit For    'we're done
                End If
            Next cnt_lp
            '*** end of searching for unused codes**************************************


            '+++ start of short range pointer compresion ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
            If (escape_not_found = True) Then
                MessageBox.Show("This file cannot be compressed. No suitable escape code could be found.", "TAP file could not be compressed", MessageBoxButtons.OK)
            Else
                Dim wFile As System.IO.FileStream                       'the compression results are saved to a working file
                wFile = New FileStream(CompressedFilename, FileMode.Create) 'open the file to which we want to write if it exist it will be overwritten, if it does not exist it will be created
                wFile.WriteByte(0)   'The first data written to the file would be the syncpoint info, which is for file consistency only because we know this value will always be 0,0,0
                wFile.WriteByte(0)   '.SB
                wFile.WriteByte(0)   'position in the uncompressed file LSB

                Dim pgrBar As New frm_ProgressBar
                pgrBar.Init("Compressing .TAP file", 0, (filesize - skip))
                pgrBar.Show()

                For j = 0 To (arr_size - 1)     'clear old_data array by filling it with a known value, we use the value: 0
                    old_data(j) = 0
                Next j

                fs.Position = skip              'set filepointer to the proper location in the file (skip ... bytes (the header of the .TAP file))
                For i = skip To filesize - 1    'check if the bytesequence that we are reading is also present in the old_data
                    match_length = 0
                    match_pntr = 0
                    best_match_length = 0
                    best_match_pntr = 0
                    new_data(match_length) = fs.ReadByte
                    For j = 0 To (arr_size - 2)
                        'we want a marker at fixed location in the file, we call this syncpoints, here the history data will be reset. This makes it possible to enter the file from this point without re-reading all the previous data to build up the history table
                        'because the compression may lead to a set of 3 bytes, we must anticipate for this by checking before this occurs, so we can prevent missing the syncpoint
                        If ((wFile.Position Mod SyncPoint) >= (SyncPoint - 3)) Then
                            'Debug.Print("**mark @" & wFile.Position)
                            Exit For 'exit to allow for syncpoint to be written to the file at the exact syncpoint location in the file
                        End If

                        If (old_data(j) = new_data(match_length)) Then 'compare the data in the buffer to the new data
                            If (match_length = 0) Then  'check if this is the first match (as it could be a sequence of matches we only need the start of the match to point to)
                                match_pntr = j          'a new match is found, save the pointer to work reg.
                            End If

                            match_length = match_length + 1 'increment counter
                            If (match_length > best_match_length) Then 'when the current match is better then the previous matches we update the pointer to the current match
                                Try
                                    new_data(match_length) = fs.ReadByte
                                Catch
                                    Debug.Print("end of file detected")
                                    Exit For
                                End Try

                                best_match_length = match_length
                                best_match_pntr = match_pntr
                            End If
                        Else
                            match_length = 0
                            match_pntr = 0
                        End If
                    Next j

                    If (best_match_length > 3) Then 'only when the match length exceeds 3 it is a usefull match, shorter matches are not compressable because the code to describe it would be longer then the code it replaces (therefore making the file longer instead of shorter)
                        wFile.WriteByte(Escapecode)         'escape code to indicate the pointer
                        wFile.WriteByte(best_match_pntr)    'pointer to datablock in the old_data array
                        wFile.WriteByte(best_match_length)  'length of the block
                        'Debug.print("Pntr=" & best_match_pntr & ", Len="  & best_match_length)

                        fs.Position = fs.Position - 1 'undo the reading action of the last byte that could not be matched

                        For j = 0 To ((arr_size - 1) - best_match_length)  'refresh aray by adding the new data
                            old_data(j) = old_data(j + best_match_length)
                        Next j

                        For j = 0 To (best_match_length - 1)  'add the data read during comparison to the array
                            old_data((arr_size - 1) - j) = new_data((best_match_length - 1) - j)
                        Next j

                        i = i + best_match_length - 1 'correct loopcounter (otherwise we may be reading past the file)
                    Else
                        wFile.WriteByte(new_data(0)) 'this byte is not part of a (large enough) sequence of bytes as found in the old_data array (so it cannot be compressed), save data to working file

                        fs.Position = fs.Position - best_match_length 'undo the reading action that did not lead to a long enough match, because the last byte could be of use for a new match that we might miss if we do not do this
                        For j = 0 To (arr_size - 2) 'refresh array holding the last 256 bytes of the file
                            old_data(j) = old_data(j + 1)
                        Next j
                        old_data(arr_size - 1) = new_data(0)
                    End If

                    'show array contents
                    '===================
                    ''For lp = 0 To (arr_size - 1)  'show contents of array (for debugging only, use this only on very small and textbased files, otherwise it is useless)
                    ''    'Debug.Write(Convert.ToChar(old_data(lp)) & "=" & old_data(lp) & ",")
                    ''    'Debug.Write(old_data(lp).ToString("X2") & ",") 'Convert.ToChar(old_data(lp)))
                    ''    If (old_data(lp) = 0) Then
                    ''        Debug.Write("-")
                    ''    Else
                    ''        Debug.Write(Convert.ToChar(old_data(lp)))
                    ''    End If
                    ''Next lp
                    ''Debug.Print("")

                    If ((i Mod 10240) = 0) Then 'update progressbar every 10 KByte (if we do it on every byte, then it would slow down this for-next loop significantly)
                        pgrBar.Value(i)
                        Application.DoEvents()      'update the GUI!!!
                    End If

                    If ((wFile.Position Mod SyncPoint) = 0) Then
                        'Debug.Print("Syncpoint@" & wFile.Position)
                        For j = 0 To (arr_size - 1)     'clear old_data array by filling it with a known value, we use the value: 0
                            old_data(j) = 0             'resettting the history data makes it possible to read the file from this point
                        Next j                          'without having to re-read the previous data to build up the history data array

                        'The syncpoint indicates the fileposition of the uncompressed file (so that the reader knows where he/she is in the file that is to be decoded, this allows for faster winding/rewinding because not the whole file needs to be read up to the desired point)
                        'Because syncpoints are at fixed locations, so there is no need to use an escape character
                        value = BitConverter.GetBytes(i - skip)
                        wFile.WriteByte(value(2))   'position in the uncompressed file MSB
                        wFile.WriteByte(value(1))   '.SB
                        wFile.WriteByte(value(0))   'LSB
                        syncpointcounter = syncpointcounter + 1
                    End If
                Next i

                CompressedFilesize = wFile.Length   'the exacte size of the compressed data is the size of the file we just made, it is as easy as that
                wFile.Close()                       'close the working file
                Compress_TAP = True                 'raise the flag of a positive compression
                CompressionFactor = ((((filesize - skip) - CompressedFilesize) / (filesize - skip)) * 100).ToString("N0") & "%"
                Debug.Print("filename:" & System.IO.Path.GetFileName(filepath) & ", real-skip=" & (filesize - skip) & ", compressed=" & CompressedFilesize.ToString & " (size reduction of " & CompressionFactor & ")")
                pgrBar.Dispose()                'progressbar no longer needed
            End If
            '+++ end of short range pointer compression ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        End Using


        'Now that we've created the compressed file we know the exactly the location of the last sync. point.
        'We now must change the value of this sync. point to 0xFFFFFF in order to mark this sync. point as "last sync point"
        Dim fileStream As IO.FileStream = New IO.FileStream(CompressedFilename, IO.FileMode.Open)
        fileStream.Seek((syncpointcounter * SyncPoint), IO.SeekOrigin.Begin)    'Set the stream position to the desired location of the stream.
        fileStream.WriteByte(&HFF)                                              'Create the marker value on the location of the last syncpoint
        fileStream.WriteByte(&HFF)
        fileStream.WriteByte(&HFF)
        fileStream.Close()

    End Function


End Class

''== start of run length encoding ===========================================================================================================================
''Run Length Encoding (the easiest method of comression)
''the string: aaabbbbbbbbbdefghjjjjjkllllllmnoopabc
''can be converted into: a*2b*8defghj*4kl*5mnoopabc  (the o is not written as o*1 because that would be 3 bytes of data while oo is only 2 bytes and we are trying to make it smaller)
''because we are working with bytes to indicate how much times the value should be repeated, a series of 500b's would be written not as b*499 but as b*256*242
''the above shows the use of a escape code to indicate how much time a char is repeated
''another escape code could be used to point to repeating patterns of different values which are stored in a codebook

'Const skip As UInteger = 16 'the first ... bytes of a TAP file (header) do not contain data, they must be skipped
'Dim i As Long
'Dim j As Integer
'Dim filesize As Long
'Dim filesize_after As Long
'filesize_after = 0
'Dim cur_val As Byte 'Char
'Dim prev_val As Byte 'Char

''Dim ar As String = "aaabbbbbbbbbdefghjjjjjkllllllmnoopabc"
''Dim ex As String = ""

'Using fs As New MemoryStream(File.ReadAllBytes(filepath)) 'copy file to memory for faster access during processing
'    filesize = fs.Length
'    fs.Position = skip 'skip ... bytes

'    j = 0
'    prev_val = 0 'not_used_value 
'    For i = skip To filesize - 1
'        'For i = 0 To (ar.Length - 1)
'        cur_val = fs.ReadByte 'cur_val = ar(i)
'        If ((prev_val = cur_val) And (j < 255)) Then 'the repeat counter reaches no further then 255
'            j = j + 1
'        Else
'            If (j = 1) Then  'adding an escape code and repeat value in this situation would only make the file larger, we do not want that
'                ' ex = ex & prev_val
'                filesize_after = filesize_after + 1 'previous and current value
'            ElseIf (j > 1) Then 'we are saving bytes by compressing, save escape code and number of repeats to file
'                'ex = ex & "*" & j.ToString
'                filesize_after = filesize_after + 2 'escape code and the repeatvalue
'            End If
'            'save current value to file
'            'ex = ex & cur_val
'            filesize_after = filesize_after + 1
'            j = 0 'reset the repeat counter
'        End If

'        prev_val = cur_val
'    Next i

'    Debug.Print("before=" & filesize & ", after=" & filesize_after)
'    ' Debug.Print("ar=" & ar & ", ex=" & ex)

'End Using
''== end of run length encoding ===========================================================================================================================
