Problem
Need to migrate data from Teradata system SRC (i.e. SouRCe) to Teradata system TGT (i.e. TarGeT).
Approach
Create a pair of named pipes called $SRC_FIFO and $TGT_FIFO.
Link these together by cat'ing $SRC_FIFO and outputting into $TGT_FIFO and running in the background like this:
cat $SRC_FIFO > $TGT_FIFO &
The Producer for $SRC_FIFO is the arcmain process extracting data from SRC.
The Consumer for $SRC_FIFO is the cat command above i.e. cat $SRC_FIFO
The Producer for $TGT_FIFO is the input to $TGT_FIFO from the cat command above i.e. > $TGT_FIFO.
The Consumer for $TGT_FIFO is the arcmain process copying data into TGT.
Supporting Files
A single file is required with a list of tables and their associated databases in the SRC system with the following format:
databaseX.tableA
databaseY.tableB
...
databaseZ.tableC
Note in this version - there should not be no whitespace on any line in the file.
In the code, this file is referenced as $ARC_OBJECT_FILE.
Command line
To run the code, use something like:
./mig_test2dev.ksh ARC_OBJ_LIST=/tmp/example_archive_file_list 2>&1 | tee mig_test2dev.log
where the list of objects to be migrated is in the file /tmp/example_archive_file
Code
The code can be found at the end of this post.
Potential Problems
There is a reasonable desire to improve the throughput of both arcmain processes by increasing the SESSIONS=NN argument. I have had "general communication" errors when setting this argument to greater values.
Future improvements
This script was coded quickly and there are parts that need improvements - esp to make it generic for running any environment.
- Allow whitespace in the file with the list of database tables (see example below)
- Add break-point to a staging area to support migration across remote sites
- Add compression support to arcmain procedures
- Add chunking of arcmain archives (breaking into xGB chunks)
- Adding copy across a network via SSH
- Add check to determine whether the db.table already exists in the TGT system
- If so, allow option of suffixing with date YYYYMMDD or NNN version number
- GUI - there is already a product to do this - so probably not
Use and Support
Happy for the code to be used (at your own risk).
There is no support for this code - though would be happy to assist if able.
Code Listing
$ cat mig_test2dev.ksh
#!/bin/ksh
# Program: mig_test2dev.ksh
# Description: Teradata data migration script.
# Moves data between TD system SRC (ie SouRCe) and TD system TGT (ie TarGeT)
# Version: 1.0 gml 31-May-2011 Initial version
# ---------------------------------------------------------------------------------------
err_msg() {
echo "ERROR: in $PRG - $1"
echo "usage: mig_test2dev.ksh ARC_OBJ_LIST="
echo "example: ./mig_test2dev.ksh ARC_OBJ_LIST=/tmp/example_archive_file_list 2>&1 | tee mig_test2dev.log"
exit 1
}
# Remove remnant FIFOs
cleanup_fifo() {
if [ -n "$DEBUG" ]
then
set -x
fi
SRC_OR_TGT=$1
# clean up existing ARC_FIFO
if [ -p "${ARC_FIFO}_${SRC_OR_TGT}" ]
then
rm ${ARC_FIFO}_${SRC_OR_TGT}
fi
}
# create the arcmain scripts
create_arcmain_script() {
if [ -n "$DEBUG" ]
then
set -x
fi
SRC_OR_TGT=$1
THE_ARCMAIN_SCRIPT=${ARCMAIN_SCRIPT}_${NOW}_${SRC_OR_TGT}.arc
TMP_ARC_SCRIPT=${ARC_OBJ_LIST}.$$ # To create a temp script file
THE_ARC_FIFO=${ARC_FIFO}_${SRC_OR_TGT} # The FIFO is named in the arcmain script (instead of a filename)
if [ "$SRC_OR_TGT" = "SRC" ]
then
LOGON_STR="${TD_SRC}/${TD_SRC_LOGON}"
ARC_CMD="ARCHIVE"
cat ${ARC_OBJ_LIST} | awk -F'.' ' { printf("(%c%s%c.%c%s%c),\n",34,$1,34,34,$2,34) } ' > ${TMP_ARC_SCRIPT}
else # [ "$SRC_OR_TGT" = "TGT" ]
LOGON_STR="${TD_TGT}/${TD_TGT_LOGON}"
ARC_CMD="COPY"
cat ${ARC_OBJ_LIST} | awk -F'.' -v v_tgt_db=$TGT_DB -v v_tgt_tab_suffix=$TGT_TAB_SUFFIX ' { printf("(%c%s%c.%c%s%s%c) (FROM(%c%s%c.%c%s%c)),\n",34,v_tgt_db,34,34,$2,v_tgt_tab_suffix,34,34,$1,34,34,$2,34) } ' > ${TMP_ARC_SCRIPT}
fi
# build the arcmain_script from the arcmain_script_master
if [ -n "${TMP_ARC_SCRIPT}" ]
then
cat <${THE_ARCMAIN_SCRIPT}
logon ${LOGON_STR};
${ARC_CMD} DATA TABLES
EOF
cat ${TMP_ARC_SCRIPT} >> ${THE_ARCMAIN_SCRIPT}
cat <>${THE_ARCMAIN_SCRIPT}
RELEASE LOCK,
FILE=${THE_ARC_FIFO};
LOGOFF;
EOF
rm ${TMP_ARC_SCRIPT}
else
err_msg "No TMP_ARC_SCRIPT (${TMP_ARC_SCRIPT}) produced"
fi
}
# Main Program
# evaluate any arguments like X=A Y=B ... Z=C
eval $@
# if the DEBUG flag is set, then show the result/impact of every line
if [ -n "$DEBUG" ]
then
set -x
fi
# set up variables
PRG=`basename $0`
NOW=`date +"%Y%m%d%H%M%S"` # used (with $$) to allow multiple executions of this
TD_DEV=10.172.6.41 # Dev TD system
TD_TEST=10.172.6.71 # Test TD system
TD_SRC=${TD_SRC:-${TD_TEST}}
TD_TGT=${TD_TGT:-${TD_DEV}}
TD_SRC_LOGON=${TD_SRC_LOGON:-"ARCUSER1,xxxx"}
TD_TGT_LOGON=${TD_TGT_LOGON:-"ARCUSER1,yyyy"}
TD_SRC_LOGON_STR="${TD_SRC}/${TD_SRC_LOGON}"
TD_TGT_LOGON_STR="${TD_TGT}/${TD_TGT_LOGON}"
TGT_DB=${TGT_DB:-"ST3NF03_DB01"}
TGT_TAB_SUFFIX=${TGT_TAB_SUFFIX:-"001"} # use suffix to avoid overwriting - needs to be more sophisticated
if [ -z "${ARC_OBJ_LIST}" ]
then
err_msg "The ARC_OBJ_LIST argument is missing"
fi
if [ ! -f "${ARC_OBJ_LIST}" ]
then
err_msg "ARC_OBJ_LIST (${ARC_OBJ_LIST}) file does not exist"
fi
HOME_DIR=/home/glourei1 # my home dir
SCRIPT_DIR=${HOME_DIR}/bin # script dir
BASE_ARC_OBJ_LIST=`basename ${ARC_OBJ_LIST}`
LOCK_FILE=$SCRIPT_DIR/${PRG}_${BASE_ARC_OBJ_LIST}.lck # lock file to ensure only one of these scripts runs at a time
# add - SRC_/TGT_ARC_FIFO_DIR
ARC_FIFO=FIFO
SRC_ARC_FIFO=${ARC_FIFO}_SRC
TGT_ARC_FIFO=${ARC_FIFO}_TGT
ARCMAIN_SCRIPT=$SCRIPT_DIR/arcmain_script
SRC_ARCMAIN_SCRIPT=${ARCMAIN_SCRIPT}_${NOW}_SRC.arc
TGT_ARCMAIN_SCRIPT=${ARCMAIN_SCRIPT}_${NOW}_TGT.arc
#let Teradata decide - was getting errors using high numbers
#SRC_NUM_SESSIONS="SESSIONS=8"
#TGT_NUM_SESSIONS="SESSIONS=4"
# This ensures only one of this processes runs at a time
if [ -f $LOCK_FILE ]
then
echo "ERROR: Already running $PRG"
echo " Protected by the lock file $LOCK_FILE"
exit 1
else
touch $LOCK_FILE
fi
# cleanup legacy fifos if required
cleanup_fifo SRC
cleanup_fifo TGT
# create the SRC and TGT arcmain scripts
create_arcmain_script SRC
create_arcmain_script TGT
# Set up the FIFOs
# arcmain ARCHIVE script writes as the Producer part of the SRC_ARC_FIFO
# which is Consumed by the cat below and written as the Producer to the
# TGT_ARC_FIFO which is Consumed by the arcmain COPY into the Target (TGT) database
mkfifo $SRC_ARC_FIFO
mkfifo $TGT_ARC_FIFO
cat ${SRC_ARC_FIFO} > ${TGT_ARC_FIFO} &
# run the arcmain archive
echo "Starting source arcmain ..."
arcmain $SRC_NUM_SESSIONS <$SRC_ARCMAIN_SCRIPT && SRC_RC=$? && [ $SRC_RC -ne 0 ] && err_msg "arcmain archive failed [SRC_RC=${SRC_RC}]" &
sleep 30
# run the arcmain copy/restore
echo "Starting target arcmain ..."
arcmain $TGT_NUM_SESSIONS <$TGT_ARCMAIN_SCRIPT && TGT_RC=$? && [ $TGT_RC -ne 0 ] && err_msg "arcmain copy/restore failed [TGT_RC=${TGT_RC}]" &
# need to wait till all the background processes have completed
wait
# clean up
rm $LOCK_FILE
# all's well - fin
exit 0
No comments:
Post a Comment