convertierung von xml format in anderes xml-format

  • hallo alle miteinander!

    meine programmiererfahrungen sind etwas eingerostet, daher macht mir diese aufgabe probleme:

    ich habe XML dokumente in folgendem Format :

    ---------------------------
    <?xml version="1.0" encoding="ISO-8859-1"?>
    <!DOCTYPE DefaultDIAsDEMvolume SYSTEM "DefaultDIAsDEMvolume.dtd">
    <DefaultDIAsDEMvolume NumberOfDocuments="10">
    <DefaultDIAsDEMdocument NumberOfTextUnitsLayers="1">
    <MetaData>
    <Name>DiasdemDocumentID</Name>
    <Content>C:\Dokumente und Einstellungen\Hope\Desktop\DA\Donnerstag\DIAsDEM.workbench21\data\Hoffnung\hopecollect\volume100001.xml:0</Content>
    </MetaData>
    <MetaData>
    <Name>SourceFile</Name>
    <Content>C:\Dokumente und Einstellungen\Hope\Desktop\DA\Donnerstag\DIAsDEM.workbench21\data\samples\de\case2\file10010.txt</Content>
    </MetaData>
    <OriginalText>Die BANA Immobilien GmbH, Sitz Frankfurt/Main, ist als persönlich haftende Gesellschafterin eingetreten. Sie ist von der Vertretung der Gesellschaft ausgeschlossen.</OriginalText>
    <TextUnitsLayer TextUnitsLayerID="0" TextUnitsDescription="Algorithm: HEURISTIC_SENTENCE_IDENTIFIER">
    <OriginalTextUnits>
    <OriginalTextUnit TextUnitID="0" BeginIndex="0" EndIndex="104">Die BANA Immobilien GmbH, Sitz Frankfurt/Main, ist als persönlich haftende Gesellschafterin eingetreten.</OriginalTextUnit>
    <OriginalTextUnit TextUnitID="1" BeginIndex="105" EndIndex="164">Sie ist von der Vertretung der Gesellschaft ausgeschlossen.</OriginalTextUnit>
    </OriginalTextUnits>
    <ProcessedTextUnits>
    <ProcessedTextUnit TextUnitID="0">
    <NeRef NeID="2" />
    , persönlich haftend Gesellschafterin eintreten .
    </ProcessedTextUnit>
    <ProcessedTextUnit TextUnitID="1">Vertretung ausschließen .</ProcessedTextUnit>
    </ProcessedTextUnits>
    <RollbackTextUnits RollbackID="0">
    <ProcessedTextUnit TextUnitID="0">
    <NeRef NeID="2" />
    , persönlich haftend Gesellschafterin eintreten .
    </ProcessedTextUnit>
    <ProcessedTextUnit TextUnitID="1">Vertretung ausschließen .</ProcessedTextUnit>
    </RollbackTextUnits>
    <NamedEntities>
    <NamedEntity NeID="0" NeType="organization">BANA Immobilien GmbH</NamedEntity>
    <NamedEntity NeID="1" NeType="place">Frankfurt / Main</NamedEntity>
    <NamedEntity NeID="2" NeType="company">2|null|company|null|BANA Immobilien GmbH|null|null|Frankfurt / Main|null</NamedEntity>
    </NamedEntities>
    </TextUnitsLayer>
    </DefaultDIAsDEMdocument>
    </DefaultDIAsDEMvolume>
    ---------------------------

    brauche aber zur weiteren bearbeitung ein anderes xml-Format:
    ---------------------------
    <?xml version = '1.0' encoding = 'UTF-8'?>
    <ParDoc stage="1" source="F:\in_mb\test5\web_parta\news_story.cfm_StoryID=10000120&amp;full=1&amp;print=1.html" content-domain="testtesttest">
    <front>
    <downloadtime>Sat, 23 Apr 2005 09:41:37 GMT</downloadtime>
    <modtime>Thu, 21 Apr 2005 22:56:04 GMT</modtime>
    <ontologyVersion></ontologyVersion>
    <title>
    <tok id="t1" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">American</tok>
    <tok id="t2" pos="NIL" lem="foundation" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Foundation</tok>
    <tok id="t3" pos="NIL" lem="for" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">For</tok>
    <tok id="t4" pos="NIL" lem="urologic" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Urologic</tok>
    <tok id="t5" pos="NIL" lem="disease" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Disease</tok>
    <tok id="t6" pos="NIL" lem="(" lookup="NIL" orth="bracket" zone="title" sepAfter="AFUD">(</tok>
    <tok id="t7" pos="NIL" lem="afud" lookup="NIL" orth="uppercase" zone="title" sepAfter=")">AFUD</tok>
    <tok id="t8" pos="NIL" lem=")" lookup="NIL" orth="bracket" zone="title" sepAfter=" ">)</tok>
    <tok id="t9" pos="NIL" lem="release" lookup="NIL" orth="capitalized" zone="title" sepAfter=":">Release</tok>
    <tok id="t10" pos="NIL" lem=":" lookup="NIL" orth="punct" zone="title" sepAfter=" ">:</tok>
    <tok id="t11" pos="NIL" lem="boxers" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Boxers</tok>
    <tok id="t12" pos="NIL" lem="or" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Or</tok>
    <tok id="t13" pos="NIL" lem="briefs" lookup="NIL" orth="capitalized" zone="title" sepAfter="?">Briefs</tok>
    <tok id="t14" pos="NIL" lem="?" lookup="NIL" orth="punct" zone="title" sepAfter=" ">?</tok>
    <tok id="t15" pos="NIL" lem="." lookup="NIL" orth="punct" zone="title" sepAfter=".">.</tok>
    <tok id="t16" pos="NIL" lem="." lookup="NIL" orth="punct" zone="title" sepAfter=".">.</tok>
    <tok id="t17" pos="NIL" lem="." lookup="NIL" orth="punct" zone="title" sepAfter="\n">.</tok>
    </title>
    </front>
    <body>
    <sec>


    <s id="sen1">
    <tok id="t18" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">American</tok>
    <tok id="t19" pos="NIL" lem="foundation" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Foundation</tok>
    <tok id="t20" pos="NIL" lem="for" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">For</tok>
    <tok id="t21" pos="NIL" lem="urologic" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Urologic</tok>
    <tok id="t22" pos="NIL" lem="disease" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Disease</tok>
    <tok id="t23" pos="NIL" lem="(" lookup="NIL" orth="bracket" zone="body" sepAfter="AFUD">(</tok>
    <tok id="t24" pos="NIL" lem="afud" lookup="NIL" orth="uppercase" zone="body" sepAfter=")">AFUD</tok>
    <tok id="t25" pos="NIL" lem=")" lookup="NIL" orth="bracket" zone="body" sepAfter="Release">)</tok>
    <tok id="t26" pos="NIL" lem="release" lookup="NIL" orth="capitalized" zone="body" sepAfter=":">Release</tok>
    <tok id="t27" pos="NIL" lem=":" lookup="NIL" orth="punct" zone="body" sepAfter=" ">:</tok>
    <tok id="t28" pos="NIL" lem="boxers" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Boxers</tok>
    <tok id="t29" pos="NIL" lem="or" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Or</tok>
    <tok id="t30" pos="NIL" lem="briefs" lookup="NIL" orth="capitalized" zone="body" sepAfter="?">Briefs</tok>
    <tok id="t31" pos="NIL" lem="?" lookup="NIL" orth="punct" zone="body" sepAfter=" ">?</tok>
    <tok id="t32" pos="NIL" lem="just" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Just</tok>
    <tok id="t33" pos="NIL" lem="one" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">One</tok>
    <tok id="t34" pos="NIL" lem="of" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Of</tok>
    <tok id="t35" pos="NIL" lem="the" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">The</tok>
    <tok id="t36" pos="NIL" lem="questions" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Questions</tok>
    <tok id="t37" pos="NIL" lem="facing" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Facing</tok>
    <tok id="t38" pos="NIL" lem="the" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">The</tok>
    <tok id="t39" pos="NIL" lem="millions" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Millions</tok>
    <tok id="t40" pos="NIL" lem="of" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Of</tok>
    <tok id="t41" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">American</tok>
    <tok id="t42" pos="NIL" lem="men" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Men</tok>
    <tok id="t43" pos="NIL" lem="struggling" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Struggling</tok>
    <tok id="t44" pos="NIL" lem="with" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">With</tok>
    <tok id="t45" pos="NIL" lem="infertility" lookup="NIL" orth="capitalized" zone="body" sepAfter="\n">Infertility</tok>
    </s>
    </p>


    <s id="sen2">
    <tok id="t46" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">American</tok>
    <tok id="t47" pos="NIL" lem="foundation" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Foundation</tok>
    <tok id="t48" pos="NIL" lem="for" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">for</tok>
    <tok id="t49" pos="NIL" lem="urologic" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Urologic</tok>
    <tok id="t50" pos="NIL" lem="disease" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Disease</tok>
    <tok id="t51" pos="NIL" lem="sheds" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">sheds</tok>
    <tok id="t52" pos="NIL" lem="light" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">light</tok>
    <tok id="t53" pos="NIL" lem="on" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">on</tok>
    <tok id="t54" pos="NIL" lem="male" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">male</tok>
    <tok id="t55" pos="NIL" lem="infertility" lookup="NIL" orth="lowercase" zone="body" sepAfter="\n">infertility</tok>
    </s>
    </p>
    </sec>
    </body>
    <back>
    <ParLex/>
    <ParCon/>
    </back>
    </ParDoc>

    ---------------------------

    sorry das das posting so groß ist.
    kann mir jemand halfen und mir erklären, wie ich aus dem einen format das andere bekomme. wenn möglich detailiert ?
    jeder tipp ist hilfreich !
    danke schon mal im voraus.

  • benutzer erstmal die code tags im forum, so bekommt man das kaum gelesen.

    dazu erläuter mal was die beiden datensätze gleich haben und welche daten gleich sind.

    Dann noch die frage was du zur verfügung hast? PHP? Perl ? mit versionsnummern.