Commit 548a0acf authored by Akarsh Simha's avatar Akarsh Simha
Browse files

Drastically speed up binary file -> SQLite database generation

Using prepared queries and committing a transaction for each 100
trixels processed.

After this change, on my laptop (Intel Core i5 dual core @ 2.5 GHz, 8
GB of RAM), the db generation took about 45 minutes for USNO NOMAD 100
million stars, and the resulting SQLite DB size is 14 GB (i.e. 10x
bloat factor)
parent 39b1e996
......@@ -32,7 +32,7 @@ nomadbinfile2sqlite.cpp -- Convert USNO NOMAD binary file to a SQLite database
#include <iostream>
#include <string.h>
#include <stdio.h>
#define DEBUG true
#define DEBUG false
using namespace std;
......@@ -176,17 +176,44 @@ bool NOMADStarDataWriter::insertStarData( unsigned int trixel, const DeepStarDat
return false;
}
sprintf( query, "INSERT INTO `%s` (`Trixel`, `RA`, `Dec`, `dRA`, `dDec`, `B`, `V`, `mag`, `PM`, `Copies`) VALUES (:Trixel, :RA, :Dec, :dRA, :dDec, :B, :V, :mag, :PM, :Copies)", db_tbl );
sqlite3_stmt *stmt;
sqlite3_prepare_v2( db, query, -1, &stmt, 0 );
if ( sqlite3_exec( db, "BEGIN TRANSACTION", 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite INSERT INTO failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
for( int i = 0; i < ntrixels; ++i ) {
sprintf( query, "INSERT INTO `%s` (`Trixel`, `RA`, `Dec`, `dRA`, `dDec`, `B`, `V`, `mag`, `PM`, `Copies`) VALUES (\'%d\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%u\')", db_tbl, TrixelList[ i ], RA, Dec, dRA, dDec, B, V, mag, PM,
( (TrixelList[ i ] == originalTrixelID) ? ntrixels : 0 ) // Duplicates get a 'Copies' value of 0. The real star gets the actual value.
);
if( sqlite3_exec( db, query, 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite INSERT INTO failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
sqlite3_bind_int( stmt, 1, TrixelList[ i ] );
sqlite3_bind_double( stmt, 2, RA );
sqlite3_bind_double( stmt, 3, Dec );
sqlite3_bind_double( stmt, 4, dRA );
sqlite3_bind_double( stmt, 5, dDec );
sqlite3_bind_double( stmt, 6, B );
sqlite3_bind_double( stmt, 7, V );
sqlite3_bind_double( stmt, 8, mag );
sqlite3_bind_double( stmt, 9, PM );
sqlite3_bind_int( stmt, 10, ( (TrixelList[ i ] == originalTrixelID) ? ntrixels : 0 ) );
sqlite3_step( stmt );
sqlite3_clear_bindings( stmt );
sqlite3_reset( stmt );
}
if ( sqlite3_exec( db, "END TRANSACTION", 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite INSERT INTO failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
sqlite3_finalize( stmt );
return true;
}
......@@ -229,6 +256,19 @@ bool NOMADStarDataWriter::writeStarDataToDB() {
return false;
}
*/
char *errorMessage = 0;
char query[2048];
sprintf( query, "INSERT INTO `%s` (`Trixel`, `RA`, `Dec`, `dRA`, `dDec`, `B`, `V`, `mag`, `PM`, `Copies`) VALUES (:Trixel, :RA, :Dec, :dRA, :dDec, :B, :V, :mag, :PM, :Copies)", db_tbl );
sqlite3_stmt *stmt;
sqlite3_prepare_v2( db, query, -1, &stmt, 0 );
if ( sqlite3_exec( db, "BEGIN TRANSACTION", 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite BEGIN TRANSACTION failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
for( trixel = 0; trixel < ntrixels; ++trixel ) {
fseek( DataFile, m_IndexOffset + trixel * INDEX_ENTRY_SIZE + 4 , SEEK_SET );
......@@ -252,7 +292,6 @@ bool NOMADStarDataWriter::writeStarDataToDB() {
/** CODE FROM INSERTSTARDATA PASTED HERE FOR SPEED */
{
char query[2048];
float mag;
float B, V, RA, Dec, dRA, dDec;
......@@ -317,23 +356,53 @@ bool NOMADStarDataWriter::writeStarDataToDB() {
}
for( int i = 0; i < nt; ++i ) {
sprintf( query, "INSERT INTO `%s` (`Trixel`, `RA`, `Dec`, `dRA`, `dDec`, `B`, `V`, `mag`, `PM`, `Copies`) VALUES (\'%d\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%lf\', \'%u\')", db_tbl, TrixelList[ i ], RA, Dec, dRA, dDec, B, V, mag, PM,
( (TrixelList[ i ] == originalTrixelID) ? nt : 0 ) // Duplicates get a 'Copies' value of 0. The real star gets the actual value.
);
char *errorMessage = 0;
if( sqlite3_exec( db, query, 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite INSERT INTO failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
sqlite3_bind_int( stmt, 1, TrixelList[ i ] );
sqlite3_bind_double( stmt, 2, RA );
sqlite3_bind_double( stmt, 3, Dec );
sqlite3_bind_double( stmt, 4, dRA );
sqlite3_bind_double( stmt, 5, dDec );
sqlite3_bind_double( stmt, 6, B );
sqlite3_bind_double( stmt, 7, V );
sqlite3_bind_double( stmt, 8, mag );
sqlite3_bind_double( stmt, 9, PM );
sqlite3_bind_int( stmt, 10, ( (TrixelList[ i ] == originalTrixelID) ? ntrixels : 0 ) );
sqlite3_step( stmt );
sqlite3_clear_bindings( stmt );
sqlite3_reset( stmt );
}
}
}
if( trixel % 100 == 0 )
if( trixel % 100 == 0 && trixel != 0 ) {
if ( sqlite3_exec( db, "END TRANSACTION", 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite END TRANSACTION failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
sqlite3_finalize( stmt );
sqlite3_prepare_v2( db, query, -1, &stmt, 0 );
if ( sqlite3_exec( db, "BEGIN TRANSACTION", 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite BEGIN TRANSACTION failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
cout << "Finished trixel " << trixel << endl;
}
}
if ( sqlite3_exec( db, "END TRANSACTION", 0, 0, &errorMessage ) != SQLITE_OK ) {
cerr << "SQLite INSERT INTO failed! Query was: " << endl << query << endl;
cerr << "Error was: " << errorMessage << endl;
sqlite3_free( errorMessage );
return false;
}
sqlite3_finalize( stmt );
return true;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment