Skip to content

TDX数据解析工具

在通达信软件的安装目录下,可以找到一个名为vipdoc的文件夹,里面存放着各个股票的分时、日线、周线、月线等行情数据文件。这些went文件都是二进制文件,需要通过特定的解析算法才能将其转换为可读的文本格式。

简介

TDX数据解析工具是一个用于处理通达信软件数据文件的Python工具包。主要用于读取和分析通达信的日线数据(.day)和分钟数据(.lc1)文件。

数据格式说明

  • 支持读取通达信日线数据文件(.day)文件名即股票代码,每32个字节为一个数据,其中每4个字节为一个字段,每个字段内低字节在前。
txt

00 ~ 03 字节:年月日, 整型

04 ~ 07 字节:开盘价*1000, 整型

08 ~ 11 字节:最高价*1000,  整型

12 ~ 15 字节:最低价*1000,  整型

16 ~ 19 字节:收盘价*1000,  整型

20 ~ 23 字节:成交额(元),float型

24 ~ 27 字节:成交量(手),整型

28 ~ 31 字节:上日收盘*1000, 整型
  • 支持读取通达信1分钟数据文件(.lc1),5分钟数据文件(.lc5),15分钟数据文件(.lc15),30分钟数据文件(.lc30),60分钟数据文件(.lc60)。文件名即股票代码,每32个字节为一个数据,其中每4个字节为一个字段,每个字段内低字节在前。
txt
00 ~ 01 字节:日期,整型,设其值为num,则日期计算方法为:
      year=floor(num/2048)+2004;
      month=floor(mod(num,2048)/100);
      day=mod(mod(num,2048),100);

02 ~ 03 字节:从0点开始至目前的分钟数,整型

04 ~ 07 字节:开盘价(分),整型

08 ~ 11 字节:最高价(分),整型

12 ~ 15 字节:最低价(分),整型

16 ~ 19 字节:收盘价(分),整型

20 ~ 23 字节:成交额(元),float型

24 ~ 27 字节:成交量(股)

28 ~ 31 字节:保留,一般都是0

Python码示例

日线数据读取

python
import os
import struct

def read_day_data(file_path):
  with open(file_path, 'rb') as f:
    buf = f.read()
    size = len(buf)
    rec_count = int(size / 32)
    
    result = []
    for i in range(rec_count):
      data = unpack('=IIIIIfII', buf[i * 32:(i + 1) * 32])
      result.append({
        'date': data[0],
        'open': data[1] / 100.0,
        'high': data[2] / 100.0,
        'low': data[3] / 100.0,
        'close': data[4] / 100.0,
        'amount': data[5],
        'volume': data[6]
      })
      
    return pd.DataFrame(result)

分钟数据读取

python
import os
import struct

def read_minute_data(file_path):
  with open(file_path, 'rb') as f:
    buf = f.read()
    size = len(buf)
    rec_count = int(size / 32)
    
    result = []
    for i in range(rec_count):
      data = unpack('=IIIIIfII', buf[i * 32:(i + 1) * 32])
      result.append({
        'time': data[0],
        'open': data[1] / 100.0,
        'high': data[2] / 100.0,
        'low': data[3] / 100.0,
        'close': data[4] / 100.0,
        'amount': data[5],
        'volume': data[6]
      })
      
    return pd.DataFrame(result)

使用示例

python
# 读取日线数据
day_data = read_day_data('path/to/stock.day')
print(day_data.head())

# 读取分钟数据
minute_data = read_minute_data('path/to/stock.lc1')
print(minute_data.head())

Java示例

以下是一个简单的Java程序,用于读取通达信日线数据文件(.day)和分钟数据文件(.lc1)。java语言参考Python读取二进制数据后解析二级制实现struct功能,幸运的是已有开源项目可以实现。JStruct

java
/**
 *
 * JStruct:  The python struct library's port to java for reading and writing binary data as in python —  Copyright (C) 2016 Sohan Basak
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation, version 3 of the license
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with this program. 
 * if not, see http://www.gnu.org/licenses/.
 *
 */

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.lang.*;
import java.nio.ByteOrder;
import java.util.Arrays;

import static java.lang.Math.*;

public class Struct {
    private short BigEndian = 0;
    private short LittleEndian = 1;
    private short byteOrder;
    private short nativeByteOrder;

    Struct(){
        ByteOrder x = ByteOrder.nativeOrder();
        if( x == ByteOrder.LITTLE_ENDIAN)
            nativeByteOrder = LittleEndian;
        else
            nativeByteOrder = BigEndian;

        byteOrder = nativeByteOrder;
    }

    private byte[] reverseBytes(byte[] b){
        byte tmp;
        for(int i=0; i<(b.length/2); i++){
            tmp = b[i];
            b[i]=b[b.length-i-1];
            b[b.length-i-1]=tmp;
        }

        return b;
    }


    private byte[] packRaw_16b(short val){
        byte[] bx = new byte[2];

        if (val>=0){
            bx[0]= (byte) (val & 0xff);
            bx[1]= (byte) ((val>>8) &0xff);

        } else {
            int v2 = abs(val);
            v2 = (v2 ^ 0x7fff)+1; // invert bits and add 1
            v2 = v2 | (1<<15);
            bx[0] = (byte) (v2 & 0xff);
            bx[1] = (byte) ((v2>>8) & 0xff);

        }

        if(byteOrder==BigEndian){
            bx = reverseBytes(bx);
        }

        return bx;
    }

    private byte[] packRaw_u16b(int val){
        byte[] bx = new byte[2];

        val = val & 0xffff; //truncate

        if (val>=0){
            bx[0]= (byte) (val & 0xff);
            bx[1]= (byte) ((val>>8) &0xff);

        }

        if(byteOrder==BigEndian){
            bx = reverseBytes(bx);
        }

        return bx;
    }

    private byte[] packRaw_32b(int val){
        byte[] bx = new byte[4];

        if (val>=0){
            bx[0]= (byte) (val & 0xff);
            bx[1]= (byte) ((val>>8) &0xff);
            bx[2]= (byte) ((val>>16) &0xff);
            bx[3]= (byte) ((val>>24) &0xff);

        } else {
            long v2 = abs(val);
            v2 = (v2 ^ 0x7fffffff)+1; // invert bits and add 1
            v2 = v2 | (1<<31); // add the 32nd bit as negative bit
            bx[0] = (byte) (v2 & 0xff);
            bx[1] = (byte) ((v2>>8) & 0xff);
            bx[2]= (byte) ((v2>>16) &0xff);
            bx[3]= (byte) ((v2>>24) &0xff);

        }

        if(byteOrder==BigEndian){
            bx = reverseBytes(bx);
        }

        return bx;
    }

    private byte[] packRaw_u32b(long val){
        byte[] bx = new byte[4];

        val = val & 0xffffffff;

        if (val>=0){
            bx[0]= (byte) (val & 0xff);
            bx[1]= (byte) ((val>>8) &0xff);
            bx[2]= (byte) ((val>>16) &0xff);
            bx[3]= (byte) ((val>>24) &0xff);

        }

        if(byteOrder==BigEndian){
            bx = reverseBytes(bx);
        }
        return bx;
    }



    public byte[] pack_single_data(char fmt, long val){
        byte[] bx;
        switch (fmt){
            case 'h':
                short value = (short) (val & 0xffff);
                bx = packRaw_16b(value);
                break;

            case 'H':
                bx = packRaw_u16b((int) val);
                break;


            case 'i':
                int ival = (int) (val & 0xffffffff);
                bx = packRaw_32b(ival);
                break;

            case 'I':
                bx = packRaw_u32b(val);
                break;

            default:
                //do nothing
                System.out.println("Invalid format specifier");
                bx = null;
                break;

        }

        return bx;
    }


    public byte[] pack(String fmt, long val) throws Exception{
        if(fmt.length()>2){
            throw new Exception("Single values may not have multiple format specifiers");
        }

        byte[] bx = new byte[1];
        for (int i=0; i<fmt.length(); i++){
            char c = fmt.charAt(i);
            if ((i == 0) && ((c == '>') || (c == '<') || (c == '@') || (c == '!'))){
                if (c == '>')
                    byteOrder = BigEndian;
                else if (c == '<')
                    byteOrder = LittleEndian;
                else if(c == '!')
                    byteOrder = BigEndian;
                else if (c == '@')
                    byteOrder = nativeByteOrder;
            }
            else if((c != '>') && (c != '<') && (c != '@') && (c != '!')) {

                bx = pack_single_data(c, val);

                if (bx == null)
                    throw new Exception("Invalid character specifier");


            }

        }
        return bx;
    }

    public byte[] pack(String fmt, long[] vals) throws Exception{
        char c0 = fmt.charAt(0);
        int len;
        if((c0 == '@') || (c0 == '>') || (c0 == '<') || (c0 == '!')) {
            len = fmt.length() - 1;
        } else {
            len = fmt.length();
        }

        if(len!=vals.length)
            throw new Exception("format length and values aren't equal");

        len = lenEst(fmt);

        byte[] bxx = new byte[0];
        byte[] bx;
        byte[] temp;

        for (int i=0; i<fmt.length(); i++){
            char c = fmt.charAt(i);
            if ((i == 0) && ((c == '>') || (c == '<') || (c == '@') || (c == '!'))){
                if (c == '>')
                    byteOrder = BigEndian;
                else if (c == '<')
                    byteOrder = LittleEndian;
                else if(c == '!')
                    byteOrder = BigEndian;
                else if (c == '@')
                    byteOrder = nativeByteOrder;
            }
            else if((c != '>') && (c != '<') && (c != '@') && (c != '!')) {
                if((c0 == '@') || (c0 == '>') || (c0 == '<') || (c0 == '!')) {
                    bx = pack(Character.toString(c), vals[i-1]);
                }
                else {
                    bx = pack(Character.toString(c), vals[i]);
                }
                temp = new byte[bxx.length + bx.length];
                System.arraycopy(bxx, 0, temp, 0,bxx.length);
                System.arraycopy(bx, 0, temp, bxx.length, bx.length);

                bxx = Arrays.copyOf(temp, temp.length);
            }
        }

        return bxx;
    }


    private long unpackRaw_16b(byte[] val){
        if(byteOrder==LittleEndian)
            reverseBytes(val);

        long x;
        x = (val[0] << 8) | (val[1] & 0xff);
        if ((x>>>15&1)==1){
            x = ((x^0x7fff)&0x7fff)+1; //2's complement 16 bit
            x *= -1;
        }
        return x;
    }

    private long unpackRaw_u16b(byte[] val){
        if(byteOrder==LittleEndian)
            reverseBytes(val);

        long x;
        x = ((val[0] & 0xff) << 8) | (val[1] & 0xff);
        return x;
    }

    private long unpackRaw_32b(byte[] val){
        if(byteOrder==LittleEndian)
            reverseBytes(val);

        long x;
        x = (val[0]<<24) | (val[1]<<16) | (val[2]<<8) | (val[3]);
        if ((x>>>31&1)==1){
            x = ((x^0x7fffffff)&0x7fffffff)+1; //2's complement 32 bit
            x *= -1;
        }
        return x;
    }

    private long unpackRaw_u32b(byte[] val){
        if(byteOrder==LittleEndian)
            reverseBytes(val);

        long x;
        x = ( ((long)(val[0] & 0xff))<<24) | (((long) (val[1] & 0xff))<<16) | ( ((long)(val[2]&0xff))<<8) | ((long)(val[3] & 0xff));
        return x;
    }
    public long unpack_single_data(char fmt, byte[] val) throws Exception{
        long var = 0;
        switch (fmt){
            case 'h':
                if(val.length!=2)
                    throw new Exception("Byte length mismatch");
                var = unpackRaw_16b(val);
                break;

            case 'H':
                if (val.length!=2)
                    throw new Exception("Byte length mismatch");

                var = unpackRaw_u16b(val);
                break;

            case 'i':
                if (val.length!=4)
                    throw new Exception("Byte length mismatch");

                var = unpackRaw_32b( val);
                break;

            case 'I':
                if (val.length!=4)
                    throw new Exception("Byte length mismatch");
                var = unpackRaw_u32b(val);
                break;

            default:
                // do nothing;
                break;
        }

        return var;
    }



    private int lenEst(String fmt){
        int counter = 0;
        char x = '\0';
        for(int i =0; i<fmt.length(); i++) {
            x = fmt.charAt(i);
            if (x=='i' || x=='I')
                counter+=4;
            else if (x=='h' || x=='H')
                counter+=2;
        }
        return counter;
    }

    public long[] unpack(String fmt, byte[] vals) throws Exception{
        int len;
        len = lenEst(fmt);

        if(len!=vals.length)
            throw new Exception("format length and values aren't equal");

        char c0 = fmt.charAt(0);

        long[] bxx;
        if (c0=='@' || c0 == '<' || c0 == '>' || c0 == '!') {
            bxx = new long[fmt.length() - 1];
        }
        else{
            bxx = new long[fmt.length()];
        }
        char c;
        byte[] bShort = new byte[2];
        byte[] bLong = new byte[4];
        ByteArrayInputStream bs = new ByteArrayInputStream(vals);

        int p = 0;
        for (int i=0; i<fmt.length(); i++){
            c = fmt.charAt(i);
            if ((i == 0) && ((c == '>') || (c == '<') || (c == '@') || (c == '!'))){
                if (c == '>')
                    byteOrder = BigEndian;
                else if (c == '<')
                    byteOrder = LittleEndian;
                else if(c == '!')
                    byteOrder = BigEndian;
                else
                    byteOrder = nativeByteOrder;
            }
            else {
                if ((c != '>') && (c != '<') && (c != '@') && (c != '!')) {
                    if (c == 'h' || c == 'H') {
                        int read = bs.read(bShort);
                        bxx[p] = unpack_single_data(c, bShort);
                    }
                    else if(c == 'i' || c =='I'){
                        int read = bs.read(bLong);
                        bxx[p] = unpack_single_data(c, bLong);
                    }
                    p++;
                }
            }

        }
        return bxx;
    }

}
最近更新