All posts

📝笔记:读写文本常用操作

(Updated: )

近期本人工作中频繁用到跨语言文本读写,本文结合实例介绍使用:

读写,本文结合实例介绍使用读写文件的常规操作,以便后续查阅。

语言:Matlab

操作:读

importdata

data=importdata(data_path);

这种方式适合结构化数据的读取,如euroc_ground_truth.txt文件,其内容如下(节选):

# timestamp,tx,ty,tz,qw,qx,qy,qz
1403636580838555648,4.688319,-1.786938,0.783338,0.534108,-0.153029,-0.827383,-0.082152
1403636580843555328,4.688177,-1.786770,0.787350,0.534640,-0.152990,-0.826976,-0.082863
1403636580848555520,4.688028,-1.786598,0.791382,0.535178,-0.152945,-0.826562,-0.083605
1403636580853555456,4.687878,-1.786421,0.795429,0.535715,-0.152884,-0.826146,-0.084391
1403636580858555648,4.687727,-1.786240,0.799484,0.536244,-0.152821,-0.825731,-0.085213
1403636580863555328,4.687579,-1.786059,0.803540,0.536768,-0.152768,-0.825314,-0.086049
1403636580868555520,4.687435,-1.785881,0.807594,0.537289,-0.152725,-0.824896,-0.086890
1403636580873555456,4.687295,-1.785709,0.811642,0.537804,-0.152680,-0.824481,-0.087725
1403636580878555648,4.687158,-1.785544,0.815682,0.538317,-0.152627,-0.824067,-0.088553
1403636580883555328,4.687025,-1.785390,0.819712,0.538828,-0.152566,-0.823657,-0.089371
1403636580888555520,4.686893,-1.785247,0.823734,0.539337,-0.152496,-0.823250,-0.090170
1403636580893555456,4.686763,-1.785116,0.827749,0.539846,-0.152427,-0.822845,-0.090943

可以通过下述方式导入上述数据:

filename = 'euroc_ground_truth.txt';
delimiterIn = ' ';
headerlinesIn = 1;
A = importdata(filename,delimiterIn,headerlinesIn);

txtread

data_pattern='%f,%f,%f,%f,%f,%f,%f,%f';

[timestamp, tx, ty, tz , qw , qx , qy , qz] = ...
 textread(data_path,data_pattern,'delimiter', '\n','headerlines', 1);

textscan

上面几种方式读取到时间戳被自动转换成double类型,这不符合我们的预期,此时可以进一步指定格式读取数据,因此提到textscan函数。

fid = fopen(data_path);
tline = fgets(fid);    %读取第一行无用数据
data_pattern = '%u64,%f,%f,%f,%f,%f,%f,%f';
data_cell = textscan(fid,data_pattern) ;
fclose(fid);

File操作

SFM得到的模型文件中images.txt的格式如下所示(节选):

# Image list with two lines of data per image:
#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
#   POINTS2D[] as (X, Y, POINT3D_ID)
# Number of images: 31, mean observations per image: 780.80645161290317
1 0.884099 -0.141937 0.435855 0.0908508 0.456173 1.10575 -0.638635 12 IMG_4774.jpg
1664.41 2219.53 -1 3398.7 902.05 -1 2712.06 961.02 -1 1699.48 1238.63 -1 2879.4 2405.39 -1 2660.97 381.236 -1 3644.9 1452.33 -1 1140.93 1660.45 -1 3591.46 2353.79 -1 79.85 2666.42 -1 1140.46 2684.59 -1 2557.4 1692.47 -1 3016.67 1926.3 -1 454.761 2708.41 -1 1485.06 2722.92 -1 1492.38 1858.58 -1 1433.93 1420.58 -1 1669.55 1613.83 -1 2520.72 2673.94 -1 1046 455.571 -1 3259.22 1748.17 7126 2594.15 2494.27 -1 3093.89 876.393 -1 1660.63 1339.07 -1 832.575 1383.51 -1 149.518 2744.39 -1 3315.03 2473.41 7124 2088.65 620.031 -1 2078.78 966.732 -1 1401 1096.82 8134 3865.21 1484.07 -1 860.423 516.002 -1 3769.77 1725.51 -1 2808.02 1954.06 -1 551.916 2168.01 -1 213.281 2411.6 -1 2802.37 1706.15 -1 3677.91 2856.51 -1 874.193 2740.2 -1 858.867 2068.12 -1 3637.86 1157.71 -1 2475.61 2157.57 -1 2966.61 587.535 6881 3887.21 1767.42 -1 3182.04 1737.79 -1 841.659 1499.39 -1 843.027 1267.73 -1 1213.94 956.537 -1 3728.5 796.223 -1 3283.44 2532.15 -1 3835.46 1967.03 2538 328.52 2892.58 -1 734.794 2687.09 -1 714.173 2556.91 -1 1217.6 2491.75 -1 2723.55 2299.31 -1 873.913 2299 -1 3801.61 2217 -1 1276.33 2783.46 -1 3012.51 2168.26 -1 2890.92 2129.36 5648 3829.79 2093.15 -1 3061.59 1762.5 -1 3531.56 2084.99 -1 1246.53 1960.89 -1 3535.97 1792.58 6883 2109.03 934.487 -1 1288.39 2370.23 -1 892.76 2397.14 -1 763.122 2439.22 -1 958.999 2545.59 -1 870.587 2670.47 -1 337.499 2776.81 -1 2822.44 2829.3 -1 1445.31 2906.54 -1 3812.19 2458.6 5695 1454.09 2816.63 -1 1868.71 984.07 -1 3066.34 1223.46 -1 3864.86 1257.88 -1 3947.42 1345.05 -1 1520.25 1409.18 -1 1481.34 1526.1 -1 735.2 2137.66 -1 3127.57 2157.58 6504 687.177 2245.03 -1 559.242 2298.25 7777 1696.43 1351.53 -1 2880.04 1439.3 -1 38.7915 1549.01 -1 2890.66 1548.36 -1 47.8064 1656.89 -1 3620.52 1682.84 1716 3687.85 1720.03 -1 1722.31 1764.86 -1 254.117 2823.77 -1 181.262 2706.08 -1 160.79 2629.34 -1 324.581 2614.24 -1 2903.75 1780.66 -1 664.931 2602.83 -1 2437.42 2599.08 6885 3511.25 2305.26 -1 868.83 2488.58 -1 374.989 2360.49 -1 374.989 2360.49 -1 450.599 2331.43 -1 2801 2217.58 5656 3507.39 2202.53 7127 3204.93 2177.21 -1 2480.14 2100.03 -1 3218.11 2014.01 -1 3031.36 2002.24 5673 841.39 2006.93 -1 2657.5 1917.97 7549 2699.19 1831.71 -1 3401.06 2131.06 7396 1270.71 2606.61 -1 3595.47 1957.88 7474 3837.27 2881.06 -1 1115.83 2528.9 7779 2630.58 1986.87 6888 1955.06 1036.5 -1 2739.36 1913.07 -1 1778.16 1753.93 -1 1507.33 1745.1 -1 54.5862 1603.19 -1 3902.87 1535.15 -1 1612.35 1659.53 -1 2933.7 1483.71 -1 1604.73 1470.23 -1 3521.92 2931.27 6520 3400.6 2903.59 -1 3225.46 2900.27 6889 2857.93 2025.9 -1 2761.73 1404.53 -1 3757.05 2636.56 -1 941.592 2625.63 -1 3273.06 2553.15 -1 1316.81 2531.96 -1 2448.55 2008.76 -1 247.038 2505 7149 1603.3 1359.87 -1 528.52 2494.4 -1 87.2797 2497.3 -1 310.672 2465.84 6890 371.206 2431.08 -1

此时文本格式并不规则,此时我们采用fopen+fgets+strsplit对数据进行读取,代码如下:

images = read_images(images.txt);

% 定义读取SFM图像文本函数
function images = read_images(path)
    images = containers.Map('KeyType', 'int64', 'ValueType', 'any');
    fid = fopen(path);
    tline = fgets(fid);    %读取一行数据
    while ischar(tline)  %判断是否是文件结尾
        elems = strsplit(tline,' '); %分割以' '为间隔的字符,返回元胞数组
        if numel(elems) < 4 || strcmp(elems(1), '#') %略过非数据行
            tline = fgets(fid);
            continue
        end
        if mod(images.Count, 10) == 0 %打印读取信息
            fprintf('Reading image %d\n', images.length);
        end
        image = struct;
        image.image_id = str2num(elems{1});
        qw = str2double(elems{2});
        qx = str2double(elems{3});
        qy = str2double(elems{4});
        qz = str2double(elems{5});
        image.R = quat2rotmat([qw, qx, qy, qz]);
        tx = str2double(elems{6});
        ty = str2double(elems{7});
        tz = str2double(elems{8});
        image.t = [tx; ty; tz];
        image.camera_id = str2num(elems{9});
        image.name = elems{10};

        tline = fgets(fid);
        elems = sscanf(tline, '%f'); % 读取图像特点
        elems = reshape(elems, [3, numel(elems) / 3]);
        image.xys = elems(1:2,:)';
        image.point3D_ids = elems(3,:)';
        images(image.image_id) = image;
        tline = fgets(fid);
    end
    fclose(fid);
end
%四元数转旋转矩阵
function rotmat = quat2rotmat(qvec)
    rotmat = [1 - 2 * qvec(3).^2 - 2 * qvec(4).^2, ...
            2 * qvec(2) * qvec(3) - 2 * qvec(1) * qvec(4), ...
            2 * qvec(4) * qvec(2) + 2 * qvec(1) * qvec(3); ...

            2 * qvec(2) * qvec(3) + 2 * qvec(1) * qvec(4), ...
            1 - 2 * qvec(2).^2 - 2 * qvec(4).^2, ...
            2 * qvec(3) * qvec(4) - 2 * qvec(1) * qvec(2); ...

            2 * qvec(4) * qvec(2) - 2 * qvec(1) * qvec(3), ...
            2 * qvec(3) * qvec(4) + 2 * qvec(1) * qvec(2), ...
            1 - 2 * qvec(2).^2 - 2 * qvec(3).^2];
end

操作:写

File操作

下面的write_ply.m是写点云数据的方法,主要使用fprintf

function write_ply(path, xyz, normals, rgb)
    % Write point cloud to PLY text file.
    file = fopen(path, 'W');
    fprintf(file,'ply\n');
    fprintf(file,'format ascii 1.0\n');
    fprintf(file,'element vertex %d\n',size(xyz,1));
    fprintf(file,'property float x\n');
    fprintf(file,'property float y\n');
    fprintf(file,'property float z\n');
    fprintf(file,'property float nx\n');
    fprintf(file,'property float ny\n');
    fprintf(file,'property float nz\n');
    fprintf(file,'property uchar diffuse_red\n');
    fprintf(file,'property uchar diffuse_green\n');
    fprintf(file,'property uchar diffuse_blue\n');
    fprintf(file,'end_header\n');
    for i = 1:size(xyz, 1)
        fprintf(file, '%f %f %f %f %f %f %d %d %d\n', ...
            xyz(i,1), xyz(i,2), xyz(i,3), ...
            normals(i,1), normals(i,2), normals(i,3), ...
            uint8(rgb(i,1)), uint8(rgb(i,2)), uint8(rgb(i,3)));
    end
    fclose(file);
end

语言:c++

读的难点在于读取一行数据并实现字符的切分,可采用stringstream或者getline对字符切割。

操作:读

下图是文件读写用到的函数继承关系以及对应的头文件,具体操作可参考这篇文章

fstream类中,成员函数open(file_path,flag)实现打开文件的操作,从而将数据流和文件进行关联,作为传入参数的文件打开模式标记(flag)可参看这个文章

模式标记

适用对象

作用

ios::in

ifstream fstream

打开文件用于读取数据。如果文件不存在,则打开出错。

ios::out

ofstream fstream

打开文件用于写入数据。如果文件不存在,则新建该文件;如果文件原来就存在,则打开时清除原来的内容。

ios::app

ofstream fstream

打开文件,用于在其尾部添加数据。如果文件不存在,则新建该文件。

ios::ate

ifstream

打开一个已有的文件,并将文件读指针指向文件末尾(读写指 的概念后面解释)。如果文件不存在,则打开出错。

ios:: trunc

ofstream

打开文件时会清空内部存储的所有数据,单独使用时与 ios::out 相同。

ios::binary

ifstream ofstream fstream

以二进制方式打开文件。若不指定此模式,则以文本模式打开。

ios::in | ios::out

fstream

打开已存在的文件,既可读取其内容,也可向其写入数据。文件刚打开时,原有内容保持不变。如果文件不存在,则打开出错。

ios::in | ios::out

ofstream

打开已存在的文件,可以向其写入数据。文件刚打开时,原有内容保持不变。如果文件不存在,则打开出错。

ios::in | ios::out | ios::trunc

fstream

打开文件,既可读取其内容,也可向其写入数据。如果文件本来就存在,则打开时清除原来的内容;如果文件不存在,则新建该文件。

// 使用stringstream分割字符,注意引入头文件#include <sstream>
void read_images_text(const string& path)
{
    std::ifstream file(path);//文件到流缓冲,即内存
    std::string line;
    std::string item;
    long double qw, qx, qy, qz, tx, ty, tz;
    unsigned long camera_id;
    string image_name;
    while (std::getline(file, line))
    {
        if (line.empty() || line[0] == '#') {
            continue;
        }
        std::stringstream line_stream1(line);
        line_stream1 >> item; qw = std::stold(item);
        line_stream1 >> item; qx = std::stold(item);
        line_stream1 >> item; qy = std::stold(item);
        line_stream1 >> item; qz = std::stold(item);
        line_stream1 >> item; tx = std::stold(item);
        line_stream1 >> item; ty = std::stold(item);
        line_stream1 >> item; tz = std::stold(item);
        line_stream1 >> item; qw = std::stold(item);
        line_stream1 >> item; camera_id = std::stoul(item);
        line_stream1 >> item; image_name = item;

        // POINTS2D
        if (!std::getline(file, line)) {
            break;
        }
        std::cout <<"processing : " << image_name << std::endl;

        std::stringstream line_stream2(line);
        long double x,y;
        long long point3D_id;
        if (!line.empty())
        {
            while (!line_stream2.eof())
            {
                line_stream2 >> item; x = std::stold(item);
                line_stream2 >> item; y = std::stold(item);
                line_stream2 >> item; point3D_id = std::stoll(item);
                // cout <<x <<" " << y << " " << point3D_id<<" ";
            }
        }
    }
    file.close();
}
// 使用getline分割字符,需指定间隔字符
void read_images_text(const string& path)
{
    std::ifstream file(path);
    std::string line;
    std::string item;
    while (std::getline(file, line)) {
        StringTrim(&line);
        if (line.empty() || line[0] == '#') continue;
        std::stringstream line_stream1(line);
        // ID
        std::getline(line_stream1, item, ' ');
        // QVEC (qw, qx, qy, qz)
        std::getline(line_stream1, item, ' ');long double qw = std::stold(item);
        std::getline(line_stream1, item, ' ');long double qx = std::stold(item);
        std::getline(line_stream1, item, ' ');long double qy = std::stold(item);
        std::getline(line_stream1, item, ' ');long double qz = std::stold(item);
        // TVEC
        std::getline(line_stream1, item, ' ');long double tx = std::stold(item);
        std::getline(line_stream1, item, ' ');long double ty = std::stold(item);
        std::getline(line_stream1, item, ' ');long double tz = std::stold(item);
        // CAMERA_ID
        std::getline(line_stream1, item, ' ');unsigned long camera_id = std::stoul(item);
        // NAME
        std::getline(line_stream1, item, ' ');std::string image_name = item;
        // POINTS2D
        if (!std::getline(file, line)) break;
        StringTrim(&line);
        std::stringstream line_stream2(line);
        if (!line.empty())
        {
            while (!line_stream2.eof())
            {
                std::getline(line_stream2, item, ' ');double x = std::stold(item);
                std::getline(line_stream2, item, ' ');double y = std::stold(item);
                std::getline(line_stream2, item, ' ');
                long long point3D_id;
                if (item == "-1") {
                    std::cout <<"2D point has no 3D points" <<std::endl;
                    point3D_id = -1;
                }
                else {
                    std::cout <<"2D point ok!!!" <<std::endl;
                    point3D_id = std::stoll(item);
                }
            }
        }
    }
    file.close();
}

操作:写

写文件比读文件容易得多,此处可用ofstream进行写入文件。

// 此处给出colmap中WriteImagesText函数
void Reconstruction::WriteImagesText(const std::string& path) const {
  std::ofstream file(path, std::ios::trunc);
  CHECK(file.is_open()) << path;

  // Ensure that we don't loose any precision by storing in text.
  file.precision(17);

  file << "# Image list with two lines of data per image:" << std::endl;
  file << "#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, "
          "NAME"
       << std::endl;
  file << "#   POINTS2D[] as (X, Y, POINT3D_ID)" << std::endl;
  file << "# Number of images: " << reg_image_ids_.size()
       << ", mean observations per image: "
       << ComputeMeanObservationsPerRegImage() << std::endl;

  for (const auto& image : images_) {
    if (!image.second.IsRegistered()) {
      continue;
    }

    std::ostringstream line; //接收一行数据,通过'<<'接收
    std::string line_string;

    line << image.first << " ";

    // QVEC (qw, qx, qy, qz)
    const Eigen::Vector4d normalized_qvec =
        NormalizeQuaternion(image.second.Qvec());
    line << normalized_qvec(0) << " ";
    line << normalized_qvec(1) << " ";
    line << normalized_qvec(2) << " ";
    line << normalized_qvec(3) << " ";

    // TVEC
    line << image.second.Tvec(0) << " ";
    line << image.second.Tvec(1) << " ";
    line << image.second.Tvec(2) << " ";
    line << image.second.CameraId() << " ";
    line << image.second.Name();
    file << line.str() << std::endl;
    line.str("");
    line.clear();

    for (const Point2D& point2D : image.second.Points2D()) {
      line << point2D.X() << " ";
      line << point2D.Y() << " ";
      if (point2D.HasPoint3D()) {
        line << point2D.Point3DId() << " ";
      } else {
        line << -1 << " ";
      }
    }
    line_string = line.str();
    line_string = line_string.substr(0, line_string.size() - 1);
    file << line_string << std::endl;
  }
}

补充:读写数据时可能会涉及字符串与数字的转换,如下给出二者转换的方法。

//string to number
template <typename Type>
Type tonumber(string str)
{
    std::stringstream ss;
    ss << str;
    Type ret = 0;
    ss >> ret;
    return ret;
}
//number to str
template <typename Type>
string tostring(Type number)
{
    std::stringstream ss;
    ss << number;
    string ret;
    ss >> ret;
    return ret;
}
// 调用时:
string str = "1403636580838555648";
uint64_t number = 1403636580838555648;
num_convert = tonumber<uint64_t >(str);
str_convert = tostring<uint64_t >(number);

语言:Python

操作:读

def read_images_text(path):
    images = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()#读取一行数据
            if not line:
                break
            line = line.strip() #移除字符串头尾指定的字符(默认为空格或换行符)或字符序列
            if len(line) > 0 and line[0] != "#":
                elems = line.split()  #分割字符
                image_id = int(elems[0])
                qvec = np.array(tuple(map(float, elems[1:5])))
                tvec = np.array(tuple(map(float, elems[5:8])))
                camera_id = int(elems[8])
                image_name = elems[9]
                elems = fid.readline().split()
                xys = np.column_stack([tuple(map(float, elems[0::3])),
                                       tuple(map(float, elems[1::3]))])
                point3D_ids = np.array(tuple(map(int, elems[2::3])))
                images[image_id] = Image(
                    id=image_id, qvec=qvec, tvec=tvec,
                    camera_id=camera_id, name=image_name,
                    xys=xys, point3D_ids=point3D_ids)
    return images

操作:写

使用File(文件) open(file, mode='r') 方法,如下面写SFM matchs-pair.txt的方法:

pairs = []
for query, indices in zip(query_names, topk):
    for i in indices:
        pair = (query, db_names[i])
        pairs.append(pair)

logging.info(f'Found {len(pairs)} pairs.')
with open(output, 'w') as f:
    f.write('\n'.join(' '.join([i, j]) for i, j in pairs))

以及写SFM模型images.txt的方法都可值得参考。

# 写
def write_images_text(images, path):
    if len(images) == 0:
        mean_observations = 0
    else:
        mean_observations = sum((len(img.point3D_ids) \
         for _, img in images.items()))/len(images)
    HEADER = "# Image list with two lines of data per image:\n"
    "#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n"
    "#   POINTS2D[] as (X, Y, POINT3D_ID)\n"
    "# Number of images: {}, mean observations per image: {}\n"\
    .format(len(images),mean_observations)

    with open(path, "w") as fid:
        fid.write(HEADER)
        for _, img in images.items():
            #写图像ID/位姿/相机ID/图像名
            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
            first_line = " ".join(map(str, image_header))
            fid.write(first_line + "\n")
            #写2D点
            points_strings = []
            for xy, point3D_id in zip(img.xys, img.point3D_ids):
                points_strings.append(" ".join(map(str, [*xy, point3D_id])))
            fid.write(" ".join(points_strings) + "\n")